{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 10989, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 24.963009671393625, "learning_rate": 3.0303030303030305e-08, "loss": 0.7908, "step": 1 }, { "epoch": 0.0, "grad_norm": 22.095744933985866, "learning_rate": 6.060606060606061e-08, "loss": 0.6598, "step": 2 }, { "epoch": 0.0, "grad_norm": 25.69702268094129, "learning_rate": 9.090909090909091e-08, "loss": 0.753, "step": 3 }, { "epoch": 0.0, "grad_norm": 23.044986917244724, "learning_rate": 1.2121212121212122e-07, "loss": 0.7035, "step": 4 }, { "epoch": 0.0, "grad_norm": 22.812274623010673, "learning_rate": 1.5151515151515152e-07, "loss": 0.6897, "step": 5 }, { "epoch": 0.0, "grad_norm": 21.60420722502775, "learning_rate": 1.8181818181818183e-07, "loss": 0.6663, "step": 6 }, { "epoch": 0.0, "grad_norm": 24.887572617589537, "learning_rate": 2.1212121212121216e-07, "loss": 0.7324, "step": 7 }, { "epoch": 0.0, "grad_norm": 20.1023251864958, "learning_rate": 2.4242424242424244e-07, "loss": 0.6696, "step": 8 }, { "epoch": 0.0, "grad_norm": 19.88183998563793, "learning_rate": 2.7272727272727274e-07, "loss": 0.7208, "step": 9 }, { "epoch": 0.0, "grad_norm": 15.53190036199524, "learning_rate": 3.0303030303030305e-07, "loss": 0.6542, "step": 10 }, { "epoch": 0.0, "grad_norm": 14.172514534798532, "learning_rate": 3.3333333333333335e-07, "loss": 0.6134, "step": 11 }, { "epoch": 0.0, "grad_norm": 14.64664638504347, "learning_rate": 3.6363636363636366e-07, "loss": 0.6765, "step": 12 }, { "epoch": 0.0, "grad_norm": 14.0733259638494, "learning_rate": 3.9393939393939396e-07, "loss": 0.6455, "step": 13 }, { "epoch": 0.0, "grad_norm": 9.454225618738189, "learning_rate": 4.242424242424243e-07, "loss": 0.5932, "step": 14 }, { "epoch": 0.0, "grad_norm": 9.586251337949811, "learning_rate": 4.5454545454545457e-07, "loss": 0.5715, "step": 15 }, { "epoch": 0.0, "grad_norm": 9.675021673294397, "learning_rate": 4.848484848484849e-07, "loss": 0.5912, "step": 16 }, { "epoch": 0.0, "grad_norm": 9.270919680918658, "learning_rate": 5.151515151515152e-07, "loss": 0.5805, "step": 17 }, { "epoch": 0.0, "grad_norm": 9.306759889818276, "learning_rate": 5.454545454545455e-07, "loss": 0.5568, "step": 18 }, { "epoch": 0.01, "grad_norm": 9.34247172442105, "learning_rate": 5.757575757575758e-07, "loss": 0.6023, "step": 19 }, { "epoch": 0.01, "grad_norm": 9.58882846298745, "learning_rate": 6.060606060606061e-07, "loss": 0.5345, "step": 20 }, { "epoch": 0.01, "grad_norm": 10.106751923119504, "learning_rate": 6.363636363636364e-07, "loss": 0.5005, "step": 21 }, { "epoch": 0.01, "grad_norm": 9.46482346022856, "learning_rate": 6.666666666666667e-07, "loss": 0.5472, "step": 22 }, { "epoch": 0.01, "grad_norm": 9.834331230088742, "learning_rate": 6.969696969696971e-07, "loss": 0.5666, "step": 23 }, { "epoch": 0.01, "grad_norm": 9.025044482419066, "learning_rate": 7.272727272727273e-07, "loss": 0.545, "step": 24 }, { "epoch": 0.01, "grad_norm": 8.509716871511943, "learning_rate": 7.575757575757576e-07, "loss": 0.6096, "step": 25 }, { "epoch": 0.01, "grad_norm": 7.694280082090164, "learning_rate": 7.878787878787879e-07, "loss": 0.5522, "step": 26 }, { "epoch": 0.01, "grad_norm": 7.000787108434309, "learning_rate": 8.181818181818182e-07, "loss": 0.5293, "step": 27 }, { "epoch": 0.01, "grad_norm": 7.268597340283241, "learning_rate": 8.484848484848486e-07, "loss": 0.5282, "step": 28 }, { "epoch": 0.01, "grad_norm": 6.682821541492869, "learning_rate": 8.787878787878788e-07, "loss": 0.4408, "step": 29 }, { "epoch": 0.01, "grad_norm": 7.1023797679311205, "learning_rate": 9.090909090909091e-07, "loss": 0.4725, "step": 30 }, { "epoch": 0.01, "grad_norm": 7.787773096933421, "learning_rate": 9.393939393939395e-07, "loss": 0.5228, "step": 31 }, { "epoch": 0.01, "grad_norm": 7.357141474052988, "learning_rate": 9.696969696969698e-07, "loss": 0.4875, "step": 32 }, { "epoch": 0.01, "grad_norm": 8.03764502344773, "learning_rate": 1.0000000000000002e-06, "loss": 0.5957, "step": 33 }, { "epoch": 0.01, "grad_norm": 7.559939494819406, "learning_rate": 1.0303030303030304e-06, "loss": 0.4828, "step": 34 }, { "epoch": 0.01, "grad_norm": 6.933533714084318, "learning_rate": 1.0606060606060608e-06, "loss": 0.4173, "step": 35 }, { "epoch": 0.01, "grad_norm": 6.4322447547597355, "learning_rate": 1.090909090909091e-06, "loss": 0.4351, "step": 36 }, { "epoch": 0.01, "grad_norm": 6.8341909409978765, "learning_rate": 1.1212121212121214e-06, "loss": 0.4846, "step": 37 }, { "epoch": 0.01, "grad_norm": 7.024296627960633, "learning_rate": 1.1515151515151516e-06, "loss": 0.4643, "step": 38 }, { "epoch": 0.01, "grad_norm": 6.6892465548146225, "learning_rate": 1.181818181818182e-06, "loss": 0.4609, "step": 39 }, { "epoch": 0.01, "grad_norm": 6.375897228313598, "learning_rate": 1.2121212121212122e-06, "loss": 0.4192, "step": 40 }, { "epoch": 0.01, "grad_norm": 6.239571486593917, "learning_rate": 1.2424242424242424e-06, "loss": 0.4234, "step": 41 }, { "epoch": 0.01, "grad_norm": 6.189995411250279, "learning_rate": 1.2727272727272728e-06, "loss": 0.4106, "step": 42 }, { "epoch": 0.01, "grad_norm": 6.251701313336269, "learning_rate": 1.3030303030303032e-06, "loss": 0.4673, "step": 43 }, { "epoch": 0.01, "grad_norm": 6.416210270505744, "learning_rate": 1.3333333333333334e-06, "loss": 0.48, "step": 44 }, { "epoch": 0.01, "grad_norm": 5.956862036107494, "learning_rate": 1.3636363636363636e-06, "loss": 0.4189, "step": 45 }, { "epoch": 0.01, "grad_norm": 6.631065755223114, "learning_rate": 1.3939393939393942e-06, "loss": 0.469, "step": 46 }, { "epoch": 0.01, "grad_norm": 6.143206429696774, "learning_rate": 1.4242424242424244e-06, "loss": 0.4748, "step": 47 }, { "epoch": 0.01, "grad_norm": 5.6352843604807195, "learning_rate": 1.4545454545454546e-06, "loss": 0.4002, "step": 48 }, { "epoch": 0.01, "grad_norm": 5.6503288524640585, "learning_rate": 1.484848484848485e-06, "loss": 0.4248, "step": 49 }, { "epoch": 0.01, "grad_norm": 5.922470709983655, "learning_rate": 1.5151515151515152e-06, "loss": 0.426, "step": 50 }, { "epoch": 0.01, "grad_norm": 6.337156233437923, "learning_rate": 1.5454545454545454e-06, "loss": 0.4249, "step": 51 }, { "epoch": 0.01, "grad_norm": 5.414700581820504, "learning_rate": 1.5757575757575759e-06, "loss": 0.3907, "step": 52 }, { "epoch": 0.01, "grad_norm": 5.827231932409133, "learning_rate": 1.6060606060606063e-06, "loss": 0.4119, "step": 53 }, { "epoch": 0.01, "grad_norm": 5.804412312797118, "learning_rate": 1.6363636363636365e-06, "loss": 0.3846, "step": 54 }, { "epoch": 0.02, "grad_norm": 5.975492832662959, "learning_rate": 1.6666666666666667e-06, "loss": 0.4187, "step": 55 }, { "epoch": 0.02, "grad_norm": 5.749363114201106, "learning_rate": 1.6969696969696973e-06, "loss": 0.3686, "step": 56 }, { "epoch": 0.02, "grad_norm": 6.693761925144835, "learning_rate": 1.7272727272727275e-06, "loss": 0.4609, "step": 57 }, { "epoch": 0.02, "grad_norm": 6.040286815075631, "learning_rate": 1.7575757575757577e-06, "loss": 0.4155, "step": 58 }, { "epoch": 0.02, "grad_norm": 5.575731911956731, "learning_rate": 1.787878787878788e-06, "loss": 0.3466, "step": 59 }, { "epoch": 0.02, "grad_norm": 6.259506414275161, "learning_rate": 1.8181818181818183e-06, "loss": 0.4287, "step": 60 }, { "epoch": 0.02, "grad_norm": 5.583304081613292, "learning_rate": 1.8484848484848487e-06, "loss": 0.3393, "step": 61 }, { "epoch": 0.02, "grad_norm": 6.260371222704529, "learning_rate": 1.878787878787879e-06, "loss": 0.4168, "step": 62 }, { "epoch": 0.02, "grad_norm": 6.191282588576895, "learning_rate": 1.9090909090909095e-06, "loss": 0.4385, "step": 63 }, { "epoch": 0.02, "grad_norm": 6.464209068139261, "learning_rate": 1.9393939393939395e-06, "loss": 0.4007, "step": 64 }, { "epoch": 0.02, "grad_norm": 5.662211296069837, "learning_rate": 1.96969696969697e-06, "loss": 0.3577, "step": 65 }, { "epoch": 0.02, "grad_norm": 5.775996102629911, "learning_rate": 2.0000000000000003e-06, "loss": 0.3989, "step": 66 }, { "epoch": 0.02, "grad_norm": 6.011144490054664, "learning_rate": 2.0303030303030303e-06, "loss": 0.3788, "step": 67 }, { "epoch": 0.02, "grad_norm": 5.953400646654258, "learning_rate": 2.0606060606060607e-06, "loss": 0.4083, "step": 68 }, { "epoch": 0.02, "grad_norm": 6.052280696703109, "learning_rate": 2.090909090909091e-06, "loss": 0.367, "step": 69 }, { "epoch": 0.02, "grad_norm": 5.9004762598977125, "learning_rate": 2.1212121212121216e-06, "loss": 0.3817, "step": 70 }, { "epoch": 0.02, "grad_norm": 5.762973742327527, "learning_rate": 2.1515151515151515e-06, "loss": 0.4092, "step": 71 }, { "epoch": 0.02, "grad_norm": 5.940872146814278, "learning_rate": 2.181818181818182e-06, "loss": 0.3763, "step": 72 }, { "epoch": 0.02, "grad_norm": 5.497505091944975, "learning_rate": 2.2121212121212124e-06, "loss": 0.3936, "step": 73 }, { "epoch": 0.02, "grad_norm": 5.591034031060306, "learning_rate": 2.2424242424242428e-06, "loss": 0.3623, "step": 74 }, { "epoch": 0.02, "grad_norm": 5.879686854503531, "learning_rate": 2.2727272727272728e-06, "loss": 0.4042, "step": 75 }, { "epoch": 0.02, "grad_norm": 5.660394276940384, "learning_rate": 2.303030303030303e-06, "loss": 0.3604, "step": 76 }, { "epoch": 0.02, "grad_norm": 5.875393281807664, "learning_rate": 2.3333333333333336e-06, "loss": 0.3743, "step": 77 }, { "epoch": 0.02, "grad_norm": 5.948837592400559, "learning_rate": 2.363636363636364e-06, "loss": 0.3633, "step": 78 }, { "epoch": 0.02, "grad_norm": 5.937301466289537, "learning_rate": 2.393939393939394e-06, "loss": 0.396, "step": 79 }, { "epoch": 0.02, "grad_norm": 5.950484159292576, "learning_rate": 2.4242424242424244e-06, "loss": 0.3806, "step": 80 }, { "epoch": 0.02, "grad_norm": 5.46072994196432, "learning_rate": 2.454545454545455e-06, "loss": 0.3086, "step": 81 }, { "epoch": 0.02, "grad_norm": 6.390828680491305, "learning_rate": 2.4848484848484848e-06, "loss": 0.4507, "step": 82 }, { "epoch": 0.02, "grad_norm": 5.557633452416991, "learning_rate": 2.5151515151515156e-06, "loss": 0.3172, "step": 83 }, { "epoch": 0.02, "grad_norm": 6.382270030708564, "learning_rate": 2.5454545454545456e-06, "loss": 0.3918, "step": 84 }, { "epoch": 0.02, "grad_norm": 6.234534643265852, "learning_rate": 2.575757575757576e-06, "loss": 0.382, "step": 85 }, { "epoch": 0.02, "grad_norm": 6.057527792062359, "learning_rate": 2.6060606060606064e-06, "loss": 0.4154, "step": 86 }, { "epoch": 0.02, "grad_norm": 5.577604785412596, "learning_rate": 2.6363636363636364e-06, "loss": 0.3612, "step": 87 }, { "epoch": 0.02, "grad_norm": 5.645821291016337, "learning_rate": 2.666666666666667e-06, "loss": 0.3735, "step": 88 }, { "epoch": 0.02, "grad_norm": 5.470086629186744, "learning_rate": 2.6969696969696972e-06, "loss": 0.3597, "step": 89 }, { "epoch": 0.02, "grad_norm": 5.614806605418893, "learning_rate": 2.7272727272727272e-06, "loss": 0.3958, "step": 90 }, { "epoch": 0.02, "grad_norm": 5.491349384324725, "learning_rate": 2.7575757575757576e-06, "loss": 0.3258, "step": 91 }, { "epoch": 0.03, "grad_norm": 5.601731049079032, "learning_rate": 2.7878787878787885e-06, "loss": 0.3725, "step": 92 }, { "epoch": 0.03, "grad_norm": 5.427486992793585, "learning_rate": 2.818181818181818e-06, "loss": 0.343, "step": 93 }, { "epoch": 0.03, "grad_norm": 6.027874471200769, "learning_rate": 2.848484848484849e-06, "loss": 0.4245, "step": 94 }, { "epoch": 0.03, "grad_norm": 5.254502616168089, "learning_rate": 2.8787878787878793e-06, "loss": 0.3527, "step": 95 }, { "epoch": 0.03, "grad_norm": 5.213165569079894, "learning_rate": 2.9090909090909093e-06, "loss": 0.336, "step": 96 }, { "epoch": 0.03, "grad_norm": 6.295188374328629, "learning_rate": 2.9393939393939397e-06, "loss": 0.3424, "step": 97 }, { "epoch": 0.03, "grad_norm": 6.391180680688915, "learning_rate": 2.96969696969697e-06, "loss": 0.4139, "step": 98 }, { "epoch": 0.03, "grad_norm": 5.549063964880351, "learning_rate": 3e-06, "loss": 0.3561, "step": 99 }, { "epoch": 0.03, "grad_norm": 5.377539803863616, "learning_rate": 3.0303030303030305e-06, "loss": 0.358, "step": 100 }, { "epoch": 0.03, "grad_norm": 6.049776567725781, "learning_rate": 3.0606060606060605e-06, "loss": 0.419, "step": 101 }, { "epoch": 0.03, "grad_norm": 5.826287611008103, "learning_rate": 3.090909090909091e-06, "loss": 0.3643, "step": 102 }, { "epoch": 0.03, "grad_norm": 5.734091355477589, "learning_rate": 3.1212121212121217e-06, "loss": 0.3581, "step": 103 }, { "epoch": 0.03, "grad_norm": 5.545001951779309, "learning_rate": 3.1515151515151517e-06, "loss": 0.4023, "step": 104 }, { "epoch": 0.03, "grad_norm": 5.7114006714939105, "learning_rate": 3.181818181818182e-06, "loss": 0.4039, "step": 105 }, { "epoch": 0.03, "grad_norm": 5.688865981710678, "learning_rate": 3.2121212121212125e-06, "loss": 0.3779, "step": 106 }, { "epoch": 0.03, "grad_norm": 5.360055231790772, "learning_rate": 3.2424242424242425e-06, "loss": 0.3828, "step": 107 }, { "epoch": 0.03, "grad_norm": 5.4478559409980365, "learning_rate": 3.272727272727273e-06, "loss": 0.3969, "step": 108 }, { "epoch": 0.03, "grad_norm": 5.437423794056705, "learning_rate": 3.3030303030303033e-06, "loss": 0.3817, "step": 109 }, { "epoch": 0.03, "grad_norm": 5.192241324454696, "learning_rate": 3.3333333333333333e-06, "loss": 0.3216, "step": 110 }, { "epoch": 0.03, "grad_norm": 5.518015664701024, "learning_rate": 3.3636363636363637e-06, "loss": 0.3551, "step": 111 }, { "epoch": 0.03, "grad_norm": 5.657918730500032, "learning_rate": 3.3939393939393946e-06, "loss": 0.3804, "step": 112 }, { "epoch": 0.03, "grad_norm": 6.330609247644378, "learning_rate": 3.4242424242424246e-06, "loss": 0.4238, "step": 113 }, { "epoch": 0.03, "grad_norm": 5.7867427615733025, "learning_rate": 3.454545454545455e-06, "loss": 0.356, "step": 114 }, { "epoch": 0.03, "grad_norm": 5.929152315305148, "learning_rate": 3.4848484848484854e-06, "loss": 0.3828, "step": 115 }, { "epoch": 0.03, "grad_norm": 5.506698075720523, "learning_rate": 3.5151515151515154e-06, "loss": 0.3344, "step": 116 }, { "epoch": 0.03, "grad_norm": 5.770793727102921, "learning_rate": 3.5454545454545458e-06, "loss": 0.3798, "step": 117 }, { "epoch": 0.03, "grad_norm": 5.709331141733425, "learning_rate": 3.575757575757576e-06, "loss": 0.4183, "step": 118 }, { "epoch": 0.03, "grad_norm": 5.465020543307715, "learning_rate": 3.606060606060606e-06, "loss": 0.4153, "step": 119 }, { "epoch": 0.03, "grad_norm": 5.421646322435644, "learning_rate": 3.6363636363636366e-06, "loss": 0.393, "step": 120 }, { "epoch": 0.03, "grad_norm": 5.105192436627971, "learning_rate": 3.6666666666666666e-06, "loss": 0.3354, "step": 121 }, { "epoch": 0.03, "grad_norm": 5.3952579878429106, "learning_rate": 3.6969696969696974e-06, "loss": 0.3512, "step": 122 }, { "epoch": 0.03, "grad_norm": 6.2473999885219635, "learning_rate": 3.727272727272728e-06, "loss": 0.3633, "step": 123 }, { "epoch": 0.03, "grad_norm": 5.96772057951783, "learning_rate": 3.757575757575758e-06, "loss": 0.3289, "step": 124 }, { "epoch": 0.03, "grad_norm": 5.818716856381312, "learning_rate": 3.7878787878787882e-06, "loss": 0.342, "step": 125 }, { "epoch": 0.03, "grad_norm": 4.9962235047636225, "learning_rate": 3.818181818181819e-06, "loss": 0.2972, "step": 126 }, { "epoch": 0.03, "grad_norm": 5.345489946333712, "learning_rate": 3.848484848484848e-06, "loss": 0.3567, "step": 127 }, { "epoch": 0.03, "grad_norm": 5.388955400375534, "learning_rate": 3.878787878787879e-06, "loss": 0.3402, "step": 128 }, { "epoch": 0.04, "grad_norm": 5.051885892673309, "learning_rate": 3.90909090909091e-06, "loss": 0.3106, "step": 129 }, { "epoch": 0.04, "grad_norm": 5.372439122822092, "learning_rate": 3.93939393939394e-06, "loss": 0.3512, "step": 130 }, { "epoch": 0.04, "grad_norm": 5.92207286736919, "learning_rate": 3.96969696969697e-06, "loss": 0.3798, "step": 131 }, { "epoch": 0.04, "grad_norm": 5.244384893104628, "learning_rate": 4.000000000000001e-06, "loss": 0.3848, "step": 132 }, { "epoch": 0.04, "grad_norm": 5.443894549057829, "learning_rate": 4.030303030303031e-06, "loss": 0.3594, "step": 133 }, { "epoch": 0.04, "grad_norm": 6.503245419426116, "learning_rate": 4.060606060606061e-06, "loss": 0.3909, "step": 134 }, { "epoch": 0.04, "grad_norm": 5.563029843998091, "learning_rate": 4.0909090909090915e-06, "loss": 0.3797, "step": 135 }, { "epoch": 0.04, "grad_norm": 5.58358025111985, "learning_rate": 4.1212121212121215e-06, "loss": 0.3486, "step": 136 }, { "epoch": 0.04, "grad_norm": 5.488839068680304, "learning_rate": 4.151515151515152e-06, "loss": 0.3477, "step": 137 }, { "epoch": 0.04, "grad_norm": 5.759215057290953, "learning_rate": 4.181818181818182e-06, "loss": 0.4031, "step": 138 }, { "epoch": 0.04, "grad_norm": 5.233461690394125, "learning_rate": 4.212121212121212e-06, "loss": 0.3509, "step": 139 }, { "epoch": 0.04, "grad_norm": 5.58228048027132, "learning_rate": 4.242424242424243e-06, "loss": 0.383, "step": 140 }, { "epoch": 0.04, "grad_norm": 5.493012254311687, "learning_rate": 4.272727272727273e-06, "loss": 0.4095, "step": 141 }, { "epoch": 0.04, "grad_norm": 5.02434810357748, "learning_rate": 4.303030303030303e-06, "loss": 0.3403, "step": 142 }, { "epoch": 0.04, "grad_norm": 5.674592942271979, "learning_rate": 4.333333333333334e-06, "loss": 0.3446, "step": 143 }, { "epoch": 0.04, "grad_norm": 5.553046792559494, "learning_rate": 4.363636363636364e-06, "loss": 0.3852, "step": 144 }, { "epoch": 0.04, "grad_norm": 4.907495033740153, "learning_rate": 4.393939393939394e-06, "loss": 0.3012, "step": 145 }, { "epoch": 0.04, "grad_norm": 4.9635217935573355, "learning_rate": 4.424242424242425e-06, "loss": 0.3147, "step": 146 }, { "epoch": 0.04, "grad_norm": 5.9466591027305125, "learning_rate": 4.454545454545455e-06, "loss": 0.4026, "step": 147 }, { "epoch": 0.04, "grad_norm": 6.028605788131155, "learning_rate": 4.4848484848484855e-06, "loss": 0.3698, "step": 148 }, { "epoch": 0.04, "grad_norm": 4.865447860113725, "learning_rate": 4.5151515151515155e-06, "loss": 0.3047, "step": 149 }, { "epoch": 0.04, "grad_norm": 5.677980463632439, "learning_rate": 4.5454545454545455e-06, "loss": 0.4124, "step": 150 }, { "epoch": 0.04, "grad_norm": 5.0956202927225345, "learning_rate": 4.575757575757576e-06, "loss": 0.3653, "step": 151 }, { "epoch": 0.04, "grad_norm": 4.974959560587927, "learning_rate": 4.606060606060606e-06, "loss": 0.3514, "step": 152 }, { "epoch": 0.04, "grad_norm": 4.9707710431415215, "learning_rate": 4.636363636363636e-06, "loss": 0.338, "step": 153 }, { "epoch": 0.04, "grad_norm": 5.54767464342179, "learning_rate": 4.666666666666667e-06, "loss": 0.3571, "step": 154 }, { "epoch": 0.04, "grad_norm": 5.085419128808824, "learning_rate": 4.696969696969698e-06, "loss": 0.3209, "step": 155 }, { "epoch": 0.04, "grad_norm": 4.776400786739939, "learning_rate": 4.727272727272728e-06, "loss": 0.3478, "step": 156 }, { "epoch": 0.04, "grad_norm": 5.298048639918654, "learning_rate": 4.757575757575758e-06, "loss": 0.3278, "step": 157 }, { "epoch": 0.04, "grad_norm": 5.309330170318605, "learning_rate": 4.787878787878788e-06, "loss": 0.3445, "step": 158 }, { "epoch": 0.04, "grad_norm": 5.079416144435435, "learning_rate": 4.818181818181819e-06, "loss": 0.3588, "step": 159 }, { "epoch": 0.04, "grad_norm": 5.4053480245061465, "learning_rate": 4.848484848484849e-06, "loss": 0.3983, "step": 160 }, { "epoch": 0.04, "grad_norm": 5.1132658608232235, "learning_rate": 4.878787878787879e-06, "loss": 0.3352, "step": 161 }, { "epoch": 0.04, "grad_norm": 5.546328837286164, "learning_rate": 4.90909090909091e-06, "loss": 0.3965, "step": 162 }, { "epoch": 0.04, "grad_norm": 4.91262498808614, "learning_rate": 4.93939393939394e-06, "loss": 0.3584, "step": 163 }, { "epoch": 0.04, "grad_norm": 4.977515764826995, "learning_rate": 4.9696969696969696e-06, "loss": 0.3192, "step": 164 }, { "epoch": 0.05, "grad_norm": 4.576877435480275, "learning_rate": 5e-06, "loss": 0.3568, "step": 165 }, { "epoch": 0.05, "grad_norm": 5.092478277961341, "learning_rate": 5.030303030303031e-06, "loss": 0.3644, "step": 166 }, { "epoch": 0.05, "grad_norm": 5.018685998108373, "learning_rate": 5.060606060606061e-06, "loss": 0.3669, "step": 167 }, { "epoch": 0.05, "grad_norm": 5.374406064793353, "learning_rate": 5.090909090909091e-06, "loss": 0.3466, "step": 168 }, { "epoch": 0.05, "grad_norm": 4.954392818884204, "learning_rate": 5.121212121212121e-06, "loss": 0.354, "step": 169 }, { "epoch": 0.05, "grad_norm": 5.02059888343161, "learning_rate": 5.151515151515152e-06, "loss": 0.3279, "step": 170 }, { "epoch": 0.05, "grad_norm": 5.242788697474413, "learning_rate": 5.181818181818182e-06, "loss": 0.346, "step": 171 }, { "epoch": 0.05, "grad_norm": 5.065887809244624, "learning_rate": 5.212121212121213e-06, "loss": 0.3627, "step": 172 }, { "epoch": 0.05, "grad_norm": 5.359910442453347, "learning_rate": 5.242424242424244e-06, "loss": 0.3731, "step": 173 }, { "epoch": 0.05, "grad_norm": 5.243829282397834, "learning_rate": 5.272727272727273e-06, "loss": 0.3676, "step": 174 }, { "epoch": 0.05, "grad_norm": 5.197560167606785, "learning_rate": 5.303030303030303e-06, "loss": 0.3654, "step": 175 }, { "epoch": 0.05, "grad_norm": 5.359166123924114, "learning_rate": 5.333333333333334e-06, "loss": 0.4098, "step": 176 }, { "epoch": 0.05, "grad_norm": 5.070099999781161, "learning_rate": 5.3636363636363645e-06, "loss": 0.3415, "step": 177 }, { "epoch": 0.05, "grad_norm": 4.890704309531856, "learning_rate": 5.3939393939393945e-06, "loss": 0.3629, "step": 178 }, { "epoch": 0.05, "grad_norm": 4.983436572084698, "learning_rate": 5.424242424242425e-06, "loss": 0.3819, "step": 179 }, { "epoch": 0.05, "grad_norm": 5.3023861417999, "learning_rate": 5.4545454545454545e-06, "loss": 0.3627, "step": 180 }, { "epoch": 0.05, "grad_norm": 5.239807160282111, "learning_rate": 5.484848484848485e-06, "loss": 0.3428, "step": 181 }, { "epoch": 0.05, "grad_norm": 5.128244901835555, "learning_rate": 5.515151515151515e-06, "loss": 0.3806, "step": 182 }, { "epoch": 0.05, "grad_norm": 5.042549030366478, "learning_rate": 5.545454545454546e-06, "loss": 0.3375, "step": 183 }, { "epoch": 0.05, "grad_norm": 4.802789116341605, "learning_rate": 5.575757575757577e-06, "loss": 0.3515, "step": 184 }, { "epoch": 0.05, "grad_norm": 9.657750733656597, "learning_rate": 5.606060606060606e-06, "loss": 0.3982, "step": 185 }, { "epoch": 0.05, "grad_norm": 4.861167868735887, "learning_rate": 5.636363636363636e-06, "loss": 0.3663, "step": 186 }, { "epoch": 0.05, "grad_norm": 4.746241905347231, "learning_rate": 5.666666666666667e-06, "loss": 0.3478, "step": 187 }, { "epoch": 0.05, "grad_norm": 5.101928408899225, "learning_rate": 5.696969696969698e-06, "loss": 0.3248, "step": 188 }, { "epoch": 0.05, "grad_norm": 5.338465728282297, "learning_rate": 5.727272727272728e-06, "loss": 0.3954, "step": 189 }, { "epoch": 0.05, "grad_norm": 6.082123966970521, "learning_rate": 5.7575757575757586e-06, "loss": 0.3599, "step": 190 }, { "epoch": 0.05, "grad_norm": 5.301358545475363, "learning_rate": 5.787878787878788e-06, "loss": 0.3891, "step": 191 }, { "epoch": 0.05, "grad_norm": 4.966892866760607, "learning_rate": 5.8181818181818185e-06, "loss": 0.3225, "step": 192 }, { "epoch": 0.05, "grad_norm": 5.23131765871254, "learning_rate": 5.8484848484848485e-06, "loss": 0.3697, "step": 193 }, { "epoch": 0.05, "grad_norm": 5.736444371073679, "learning_rate": 5.878787878787879e-06, "loss": 0.4188, "step": 194 }, { "epoch": 0.05, "grad_norm": 4.96307753674885, "learning_rate": 5.90909090909091e-06, "loss": 0.3409, "step": 195 }, { "epoch": 0.05, "grad_norm": 5.310009629385978, "learning_rate": 5.93939393939394e-06, "loss": 0.3507, "step": 196 }, { "epoch": 0.05, "grad_norm": 5.373497423064573, "learning_rate": 5.96969696969697e-06, "loss": 0.3731, "step": 197 }, { "epoch": 0.05, "grad_norm": 5.1067076209325135, "learning_rate": 6e-06, "loss": 0.3304, "step": 198 }, { "epoch": 0.05, "grad_norm": 5.228116827660428, "learning_rate": 6.030303030303031e-06, "loss": 0.3886, "step": 199 }, { "epoch": 0.05, "grad_norm": 4.8393324043112065, "learning_rate": 6.060606060606061e-06, "loss": 0.3224, "step": 200 }, { "epoch": 0.05, "grad_norm": 5.351476056036347, "learning_rate": 6.090909090909092e-06, "loss": 0.3879, "step": 201 }, { "epoch": 0.06, "grad_norm": 5.2444602461543965, "learning_rate": 6.121212121212121e-06, "loss": 0.3782, "step": 202 }, { "epoch": 0.06, "grad_norm": 4.418002596987469, "learning_rate": 6.151515151515152e-06, "loss": 0.3348, "step": 203 }, { "epoch": 0.06, "grad_norm": 5.368707185290135, "learning_rate": 6.181818181818182e-06, "loss": 0.3974, "step": 204 }, { "epoch": 0.06, "grad_norm": 5.268123765918797, "learning_rate": 6.212121212121213e-06, "loss": 0.3684, "step": 205 }, { "epoch": 0.06, "grad_norm": 5.381903351059237, "learning_rate": 6.2424242424242434e-06, "loss": 0.4258, "step": 206 }, { "epoch": 0.06, "grad_norm": 5.016630412052827, "learning_rate": 6.2727272727272734e-06, "loss": 0.3784, "step": 207 }, { "epoch": 0.06, "grad_norm": 5.589692142948399, "learning_rate": 6.303030303030303e-06, "loss": 0.3613, "step": 208 }, { "epoch": 0.06, "grad_norm": 5.08075316271684, "learning_rate": 6.333333333333333e-06, "loss": 0.3641, "step": 209 }, { "epoch": 0.06, "grad_norm": 4.737607482154883, "learning_rate": 6.363636363636364e-06, "loss": 0.3245, "step": 210 }, { "epoch": 0.06, "grad_norm": 4.764211700748343, "learning_rate": 6.393939393939394e-06, "loss": 0.3448, "step": 211 }, { "epoch": 0.06, "grad_norm": 5.144947326785369, "learning_rate": 6.424242424242425e-06, "loss": 0.4126, "step": 212 }, { "epoch": 0.06, "grad_norm": 4.929264734380186, "learning_rate": 6.454545454545456e-06, "loss": 0.3385, "step": 213 }, { "epoch": 0.06, "grad_norm": 5.659585273092885, "learning_rate": 6.484848484848485e-06, "loss": 0.3543, "step": 214 }, { "epoch": 0.06, "grad_norm": 4.592493126489269, "learning_rate": 6.515151515151516e-06, "loss": 0.3184, "step": 215 }, { "epoch": 0.06, "grad_norm": 5.260498402630153, "learning_rate": 6.545454545454546e-06, "loss": 0.4262, "step": 216 }, { "epoch": 0.06, "grad_norm": 4.420814786187238, "learning_rate": 6.575757575757577e-06, "loss": 0.3014, "step": 217 }, { "epoch": 0.06, "grad_norm": 5.408942350340549, "learning_rate": 6.606060606060607e-06, "loss": 0.4314, "step": 218 }, { "epoch": 0.06, "grad_norm": 4.73209243587902, "learning_rate": 6.6363636363636375e-06, "loss": 0.3573, "step": 219 }, { "epoch": 0.06, "grad_norm": 4.7639149056367724, "learning_rate": 6.666666666666667e-06, "loss": 0.3731, "step": 220 }, { "epoch": 0.06, "grad_norm": 4.74487748529022, "learning_rate": 6.6969696969696975e-06, "loss": 0.3848, "step": 221 }, { "epoch": 0.06, "grad_norm": 4.912156430473806, "learning_rate": 6.7272727272727275e-06, "loss": 0.3922, "step": 222 }, { "epoch": 0.06, "grad_norm": 4.490592197956874, "learning_rate": 6.757575757575758e-06, "loss": 0.3785, "step": 223 }, { "epoch": 0.06, "grad_norm": 4.8134193025728775, "learning_rate": 6.787878787878789e-06, "loss": 0.4372, "step": 224 }, { "epoch": 0.06, "grad_norm": 4.899403009837595, "learning_rate": 6.818181818181818e-06, "loss": 0.3728, "step": 225 }, { "epoch": 0.06, "grad_norm": 5.162684147179018, "learning_rate": 6.848484848484849e-06, "loss": 0.3692, "step": 226 }, { "epoch": 0.06, "grad_norm": 4.405595687845643, "learning_rate": 6.878787878787879e-06, "loss": 0.3572, "step": 227 }, { "epoch": 0.06, "grad_norm": 4.756466609168483, "learning_rate": 6.90909090909091e-06, "loss": 0.3668, "step": 228 }, { "epoch": 0.06, "grad_norm": 4.583332237764576, "learning_rate": 6.93939393939394e-06, "loss": 0.325, "step": 229 }, { "epoch": 0.06, "grad_norm": 4.454990683408617, "learning_rate": 6.969696969696971e-06, "loss": 0.3558, "step": 230 }, { "epoch": 0.06, "grad_norm": 4.627618101422334, "learning_rate": 7e-06, "loss": 0.3468, "step": 231 }, { "epoch": 0.06, "grad_norm": 4.6688383247015945, "learning_rate": 7.030303030303031e-06, "loss": 0.3307, "step": 232 }, { "epoch": 0.06, "grad_norm": 4.744443819557828, "learning_rate": 7.060606060606061e-06, "loss": 0.4343, "step": 233 }, { "epoch": 0.06, "grad_norm": 4.475080534609491, "learning_rate": 7.0909090909090916e-06, "loss": 0.3784, "step": 234 }, { "epoch": 0.06, "grad_norm": 5.089697275448076, "learning_rate": 7.121212121212122e-06, "loss": 0.3448, "step": 235 }, { "epoch": 0.06, "grad_norm": 4.684068430012713, "learning_rate": 7.151515151515152e-06, "loss": 0.3515, "step": 236 }, { "epoch": 0.06, "grad_norm": 5.1266847779895715, "learning_rate": 7.181818181818182e-06, "loss": 0.4246, "step": 237 }, { "epoch": 0.06, "grad_norm": 4.640950040755782, "learning_rate": 7.212121212121212e-06, "loss": 0.3386, "step": 238 }, { "epoch": 0.07, "grad_norm": 4.3941029283087545, "learning_rate": 7.242424242424243e-06, "loss": 0.3164, "step": 239 }, { "epoch": 0.07, "grad_norm": 4.586561657989462, "learning_rate": 7.272727272727273e-06, "loss": 0.353, "step": 240 }, { "epoch": 0.07, "grad_norm": 4.455869785834386, "learning_rate": 7.303030303030304e-06, "loss": 0.3816, "step": 241 }, { "epoch": 0.07, "grad_norm": 4.624605507469806, "learning_rate": 7.333333333333333e-06, "loss": 0.3219, "step": 242 }, { "epoch": 0.07, "grad_norm": 4.668622094803916, "learning_rate": 7.363636363636364e-06, "loss": 0.37, "step": 243 }, { "epoch": 0.07, "grad_norm": 5.0291412278716905, "learning_rate": 7.393939393939395e-06, "loss": 0.423, "step": 244 }, { "epoch": 0.07, "grad_norm": 4.399242096008631, "learning_rate": 7.424242424242425e-06, "loss": 0.3091, "step": 245 }, { "epoch": 0.07, "grad_norm": 4.387874066312647, "learning_rate": 7.454545454545456e-06, "loss": 0.3559, "step": 246 }, { "epoch": 0.07, "grad_norm": 4.3071504268861815, "learning_rate": 7.484848484848486e-06, "loss": 0.3308, "step": 247 }, { "epoch": 0.07, "grad_norm": 4.704741476990014, "learning_rate": 7.515151515151516e-06, "loss": 0.364, "step": 248 }, { "epoch": 0.07, "grad_norm": 4.656372955262324, "learning_rate": 7.545454545454546e-06, "loss": 0.364, "step": 249 }, { "epoch": 0.07, "grad_norm": 4.84504276054378, "learning_rate": 7.5757575757575764e-06, "loss": 0.3884, "step": 250 }, { "epoch": 0.07, "grad_norm": 4.595365561220155, "learning_rate": 7.606060606060606e-06, "loss": 0.3817, "step": 251 }, { "epoch": 0.07, "grad_norm": 4.832724610279132, "learning_rate": 7.636363636363638e-06, "loss": 0.3586, "step": 252 }, { "epoch": 0.07, "grad_norm": 5.105327183133213, "learning_rate": 7.666666666666667e-06, "loss": 0.3436, "step": 253 }, { "epoch": 0.07, "grad_norm": 4.6333247611294315, "learning_rate": 7.696969696969696e-06, "loss": 0.3776, "step": 254 }, { "epoch": 0.07, "grad_norm": 5.8923194405407715, "learning_rate": 7.727272727272727e-06, "loss": 0.3456, "step": 255 }, { "epoch": 0.07, "grad_norm": 4.654324255061061, "learning_rate": 7.757575757575758e-06, "loss": 0.3586, "step": 256 }, { "epoch": 0.07, "grad_norm": 4.216031059077568, "learning_rate": 7.787878787878789e-06, "loss": 0.33, "step": 257 }, { "epoch": 0.07, "grad_norm": 5.3421184941495445, "learning_rate": 7.81818181818182e-06, "loss": 0.3639, "step": 258 }, { "epoch": 0.07, "grad_norm": 4.7246835791480635, "learning_rate": 7.848484848484849e-06, "loss": 0.3668, "step": 259 }, { "epoch": 0.07, "grad_norm": 4.216895871590719, "learning_rate": 7.87878787878788e-06, "loss": 0.3334, "step": 260 }, { "epoch": 0.07, "grad_norm": 4.865126708149083, "learning_rate": 7.909090909090909e-06, "loss": 0.418, "step": 261 }, { "epoch": 0.07, "grad_norm": 4.5352477939235, "learning_rate": 7.93939393939394e-06, "loss": 0.3544, "step": 262 }, { "epoch": 0.07, "grad_norm": 5.43017485905463, "learning_rate": 7.96969696969697e-06, "loss": 0.4023, "step": 263 }, { "epoch": 0.07, "grad_norm": 4.361207498380666, "learning_rate": 8.000000000000001e-06, "loss": 0.346, "step": 264 }, { "epoch": 0.07, "grad_norm": 4.2962554393520325, "learning_rate": 8.03030303030303e-06, "loss": 0.3771, "step": 265 }, { "epoch": 0.07, "grad_norm": 4.542847793967784, "learning_rate": 8.060606060606061e-06, "loss": 0.3545, "step": 266 }, { "epoch": 0.07, "grad_norm": 4.754720356123425, "learning_rate": 8.090909090909092e-06, "loss": 0.3295, "step": 267 }, { "epoch": 0.07, "grad_norm": 4.753606387063651, "learning_rate": 8.121212121212121e-06, "loss": 0.3839, "step": 268 }, { "epoch": 0.07, "grad_norm": 4.819196567998008, "learning_rate": 8.151515151515152e-06, "loss": 0.3751, "step": 269 }, { "epoch": 0.07, "grad_norm": 4.270059328658117, "learning_rate": 8.181818181818183e-06, "loss": 0.3237, "step": 270 }, { "epoch": 0.07, "grad_norm": 4.478757758383395, "learning_rate": 8.212121212121212e-06, "loss": 0.3253, "step": 271 }, { "epoch": 0.07, "grad_norm": 4.855333213634504, "learning_rate": 8.242424242424243e-06, "loss": 0.4513, "step": 272 }, { "epoch": 0.07, "grad_norm": 4.496352026923094, "learning_rate": 8.272727272727274e-06, "loss": 0.3842, "step": 273 }, { "epoch": 0.07, "grad_norm": 4.1830239779035745, "learning_rate": 8.303030303030305e-06, "loss": 0.3465, "step": 274 }, { "epoch": 0.08, "grad_norm": 4.506639392867929, "learning_rate": 8.333333333333334e-06, "loss": 0.3297, "step": 275 }, { "epoch": 0.08, "grad_norm": 4.718371369676239, "learning_rate": 8.363636363636365e-06, "loss": 0.4098, "step": 276 }, { "epoch": 0.08, "grad_norm": 4.80973464900621, "learning_rate": 8.393939393939394e-06, "loss": 0.3611, "step": 277 }, { "epoch": 0.08, "grad_norm": 5.029258451064713, "learning_rate": 8.424242424242425e-06, "loss": 0.3592, "step": 278 }, { "epoch": 0.08, "grad_norm": 4.589949492587676, "learning_rate": 8.454545454545455e-06, "loss": 0.3478, "step": 279 }, { "epoch": 0.08, "grad_norm": 4.547167896276771, "learning_rate": 8.484848484848486e-06, "loss": 0.3337, "step": 280 }, { "epoch": 0.08, "grad_norm": 4.585228209009267, "learning_rate": 8.515151515151517e-06, "loss": 0.3806, "step": 281 }, { "epoch": 0.08, "grad_norm": 4.4117849020954765, "learning_rate": 8.545454545454546e-06, "loss": 0.3307, "step": 282 }, { "epoch": 0.08, "grad_norm": 5.063490479238941, "learning_rate": 8.575757575757575e-06, "loss": 0.3744, "step": 283 }, { "epoch": 0.08, "grad_norm": 4.714007266776934, "learning_rate": 8.606060606060606e-06, "loss": 0.3748, "step": 284 }, { "epoch": 0.08, "grad_norm": 4.568036150462816, "learning_rate": 8.636363636363637e-06, "loss": 0.3623, "step": 285 }, { "epoch": 0.08, "grad_norm": 4.416935840344904, "learning_rate": 8.666666666666668e-06, "loss": 0.3391, "step": 286 }, { "epoch": 0.08, "grad_norm": 4.214651700781016, "learning_rate": 8.696969696969699e-06, "loss": 0.3329, "step": 287 }, { "epoch": 0.08, "grad_norm": 4.5773373099013455, "learning_rate": 8.727272727272728e-06, "loss": 0.3795, "step": 288 }, { "epoch": 0.08, "grad_norm": 4.34543023429054, "learning_rate": 8.757575757575759e-06, "loss": 0.3605, "step": 289 }, { "epoch": 0.08, "grad_norm": 4.675123049567969, "learning_rate": 8.787878787878788e-06, "loss": 0.431, "step": 290 }, { "epoch": 0.08, "grad_norm": 4.560810561829422, "learning_rate": 8.818181818181819e-06, "loss": 0.3418, "step": 291 }, { "epoch": 0.08, "grad_norm": 4.415951836280444, "learning_rate": 8.84848484848485e-06, "loss": 0.3237, "step": 292 }, { "epoch": 0.08, "grad_norm": 4.195433119296457, "learning_rate": 8.87878787878788e-06, "loss": 0.3285, "step": 293 }, { "epoch": 0.08, "grad_norm": 4.347089817177498, "learning_rate": 8.90909090909091e-06, "loss": 0.3367, "step": 294 }, { "epoch": 0.08, "grad_norm": 4.896486143791702, "learning_rate": 8.93939393939394e-06, "loss": 0.3859, "step": 295 }, { "epoch": 0.08, "grad_norm": 4.643803373471152, "learning_rate": 8.969696969696971e-06, "loss": 0.3505, "step": 296 }, { "epoch": 0.08, "grad_norm": 4.519047240443195, "learning_rate": 9e-06, "loss": 0.3372, "step": 297 }, { "epoch": 0.08, "grad_norm": 4.6753265490756535, "learning_rate": 9.030303030303031e-06, "loss": 0.3673, "step": 298 }, { "epoch": 0.08, "grad_norm": 4.211958443926448, "learning_rate": 9.06060606060606e-06, "loss": 0.3378, "step": 299 }, { "epoch": 0.08, "grad_norm": 4.469853288176385, "learning_rate": 9.090909090909091e-06, "loss": 0.3728, "step": 300 }, { "epoch": 0.08, "grad_norm": 4.56166594783095, "learning_rate": 9.121212121212122e-06, "loss": 0.3689, "step": 301 }, { "epoch": 0.08, "grad_norm": 4.219168289352615, "learning_rate": 9.151515151515153e-06, "loss": 0.3646, "step": 302 }, { "epoch": 0.08, "grad_norm": 4.241187922680894, "learning_rate": 9.181818181818184e-06, "loss": 0.3405, "step": 303 }, { "epoch": 0.08, "grad_norm": 4.458178869933374, "learning_rate": 9.212121212121213e-06, "loss": 0.3507, "step": 304 }, { "epoch": 0.08, "grad_norm": 4.29934909908872, "learning_rate": 9.242424242424244e-06, "loss": 0.3287, "step": 305 }, { "epoch": 0.08, "grad_norm": 4.923389714856165, "learning_rate": 9.272727272727273e-06, "loss": 0.4204, "step": 306 }, { "epoch": 0.08, "grad_norm": 4.47561834607557, "learning_rate": 9.303030303030303e-06, "loss": 0.3552, "step": 307 }, { "epoch": 0.08, "grad_norm": 4.438159776380354, "learning_rate": 9.333333333333334e-06, "loss": 0.3242, "step": 308 }, { "epoch": 0.08, "grad_norm": 4.080568527786985, "learning_rate": 9.363636363636365e-06, "loss": 0.3344, "step": 309 }, { "epoch": 0.08, "grad_norm": 4.295039432974613, "learning_rate": 9.393939393939396e-06, "loss": 0.3392, "step": 310 }, { "epoch": 0.08, "grad_norm": 4.2027332270961715, "learning_rate": 9.424242424242425e-06, "loss": 0.3182, "step": 311 }, { "epoch": 0.09, "grad_norm": 4.289971974457048, "learning_rate": 9.454545454545456e-06, "loss": 0.3451, "step": 312 }, { "epoch": 0.09, "grad_norm": 4.614472867798603, "learning_rate": 9.484848484848485e-06, "loss": 0.3813, "step": 313 }, { "epoch": 0.09, "grad_norm": 4.055910292621515, "learning_rate": 9.515151515151516e-06, "loss": 0.3189, "step": 314 }, { "epoch": 0.09, "grad_norm": 4.245295970012739, "learning_rate": 9.545454545454547e-06, "loss": 0.3289, "step": 315 }, { "epoch": 0.09, "grad_norm": 4.465327940507587, "learning_rate": 9.575757575757576e-06, "loss": 0.3532, "step": 316 }, { "epoch": 0.09, "grad_norm": 4.564700676924458, "learning_rate": 9.606060606060607e-06, "loss": 0.3611, "step": 317 }, { "epoch": 0.09, "grad_norm": 4.512417904019265, "learning_rate": 9.636363636363638e-06, "loss": 0.3892, "step": 318 }, { "epoch": 0.09, "grad_norm": 4.183674694569067, "learning_rate": 9.666666666666667e-06, "loss": 0.3209, "step": 319 }, { "epoch": 0.09, "grad_norm": 4.101289997697547, "learning_rate": 9.696969696969698e-06, "loss": 0.3422, "step": 320 }, { "epoch": 0.09, "grad_norm": 4.310839907356913, "learning_rate": 9.727272727272728e-06, "loss": 0.3293, "step": 321 }, { "epoch": 0.09, "grad_norm": 4.9054376600635585, "learning_rate": 9.757575757575758e-06, "loss": 0.3323, "step": 322 }, { "epoch": 0.09, "grad_norm": 3.888021079794081, "learning_rate": 9.787878787878788e-06, "loss": 0.3044, "step": 323 }, { "epoch": 0.09, "grad_norm": 4.273469837470886, "learning_rate": 9.81818181818182e-06, "loss": 0.3517, "step": 324 }, { "epoch": 0.09, "grad_norm": 3.7144185914422505, "learning_rate": 9.84848484848485e-06, "loss": 0.3049, "step": 325 }, { "epoch": 0.09, "grad_norm": 4.603880691659557, "learning_rate": 9.87878787878788e-06, "loss": 0.4109, "step": 326 }, { "epoch": 0.09, "grad_norm": 4.334268112001053, "learning_rate": 9.90909090909091e-06, "loss": 0.3464, "step": 327 }, { "epoch": 0.09, "grad_norm": 4.341108772132298, "learning_rate": 9.939393939393939e-06, "loss": 0.3451, "step": 328 }, { "epoch": 0.09, "grad_norm": 4.160707272996998, "learning_rate": 9.96969696969697e-06, "loss": 0.3359, "step": 329 }, { "epoch": 0.09, "grad_norm": 4.673438573112379, "learning_rate": 1e-05, "loss": 0.4091, "step": 330 }, { "epoch": 0.09, "grad_norm": 5.4089667880024255, "learning_rate": 9.999999782826503e-06, "loss": 0.3628, "step": 331 }, { "epoch": 0.09, "grad_norm": 3.9446848867881914, "learning_rate": 9.999999131306029e-06, "loss": 0.3335, "step": 332 }, { "epoch": 0.09, "grad_norm": 4.435323063815749, "learning_rate": 9.999998045438632e-06, "loss": 0.3643, "step": 333 }, { "epoch": 0.09, "grad_norm": 4.320827685063797, "learning_rate": 9.999996525224412e-06, "loss": 0.3329, "step": 334 }, { "epoch": 0.09, "grad_norm": 4.36265608974536, "learning_rate": 9.999994570663496e-06, "loss": 0.3461, "step": 335 }, { "epoch": 0.09, "grad_norm": 4.2592983722217, "learning_rate": 9.999992181756056e-06, "loss": 0.3766, "step": 336 }, { "epoch": 0.09, "grad_norm": 4.79646378296195, "learning_rate": 9.999989358502298e-06, "loss": 0.3659, "step": 337 }, { "epoch": 0.09, "grad_norm": 4.400343051257205, "learning_rate": 9.99998610090247e-06, "loss": 0.3507, "step": 338 }, { "epoch": 0.09, "grad_norm": 4.827448978040931, "learning_rate": 9.999982408956851e-06, "loss": 0.3366, "step": 339 }, { "epoch": 0.09, "grad_norm": 4.4231733343936055, "learning_rate": 9.999978282665768e-06, "loss": 0.346, "step": 340 }, { "epoch": 0.09, "grad_norm": 4.370154878114904, "learning_rate": 9.999973722029575e-06, "loss": 0.4103, "step": 341 }, { "epoch": 0.09, "grad_norm": 4.20966197134485, "learning_rate": 9.999968727048667e-06, "loss": 0.3531, "step": 342 }, { "epoch": 0.09, "grad_norm": 4.448450851329632, "learning_rate": 9.999963297723481e-06, "loss": 0.3629, "step": 343 }, { "epoch": 0.09, "grad_norm": 4.6259560351897155, "learning_rate": 9.999957434054487e-06, "loss": 0.3382, "step": 344 }, { "epoch": 0.09, "grad_norm": 4.547978010284929, "learning_rate": 9.999951136042194e-06, "loss": 0.4058, "step": 345 }, { "epoch": 0.09, "grad_norm": 4.142281305157911, "learning_rate": 9.99994440368715e-06, "loss": 0.3545, "step": 346 }, { "epoch": 0.09, "grad_norm": 4.740369583978333, "learning_rate": 9.99993723698994e-06, "loss": 0.3753, "step": 347 }, { "epoch": 0.1, "grad_norm": 4.343321713377683, "learning_rate": 9.999929635951186e-06, "loss": 0.3887, "step": 348 }, { "epoch": 0.1, "grad_norm": 4.2010583750758235, "learning_rate": 9.99992160057155e-06, "loss": 0.3212, "step": 349 }, { "epoch": 0.1, "grad_norm": 4.049524747453005, "learning_rate": 9.999913130851726e-06, "loss": 0.3324, "step": 350 }, { "epoch": 0.1, "grad_norm": 4.050123203528034, "learning_rate": 9.999904226792453e-06, "loss": 0.3308, "step": 351 }, { "epoch": 0.1, "grad_norm": 4.009035365811596, "learning_rate": 9.999894888394505e-06, "loss": 0.3417, "step": 352 }, { "epoch": 0.1, "grad_norm": 4.294750340037094, "learning_rate": 9.999885115658693e-06, "loss": 0.3578, "step": 353 }, { "epoch": 0.1, "grad_norm": 4.77967792592922, "learning_rate": 9.999874908585864e-06, "loss": 0.4079, "step": 354 }, { "epoch": 0.1, "grad_norm": 4.674857837086743, "learning_rate": 9.999864267176904e-06, "loss": 0.4115, "step": 355 }, { "epoch": 0.1, "grad_norm": 3.8718618109598215, "learning_rate": 9.999853191432741e-06, "loss": 0.3152, "step": 356 }, { "epoch": 0.1, "grad_norm": 4.331597939165435, "learning_rate": 9.999841681354334e-06, "loss": 0.3871, "step": 357 }, { "epoch": 0.1, "grad_norm": 4.277256615205333, "learning_rate": 9.999829736942686e-06, "loss": 0.3239, "step": 358 }, { "epoch": 0.1, "grad_norm": 4.149331062618436, "learning_rate": 9.999817358198831e-06, "loss": 0.3668, "step": 359 }, { "epoch": 0.1, "grad_norm": 4.315716480649614, "learning_rate": 9.999804545123847e-06, "loss": 0.3782, "step": 360 }, { "epoch": 0.1, "grad_norm": 4.164901665057027, "learning_rate": 9.999791297718844e-06, "loss": 0.3532, "step": 361 }, { "epoch": 0.1, "grad_norm": 4.7676989097225055, "learning_rate": 9.999777615984978e-06, "loss": 0.3412, "step": 362 }, { "epoch": 0.1, "grad_norm": 4.336317032183553, "learning_rate": 9.999763499923432e-06, "loss": 0.3552, "step": 363 }, { "epoch": 0.1, "grad_norm": 4.334603725462001, "learning_rate": 9.999748949535436e-06, "loss": 0.3554, "step": 364 }, { "epoch": 0.1, "grad_norm": 4.131537987396616, "learning_rate": 9.999733964822252e-06, "loss": 0.3322, "step": 365 }, { "epoch": 0.1, "grad_norm": 4.513789284576847, "learning_rate": 9.999718545785183e-06, "loss": 0.3425, "step": 366 }, { "epoch": 0.1, "grad_norm": 4.170596768573397, "learning_rate": 9.999702692425567e-06, "loss": 0.3318, "step": 367 }, { "epoch": 0.1, "grad_norm": 4.391527149872584, "learning_rate": 9.999686404744782e-06, "loss": 0.3528, "step": 368 }, { "epoch": 0.1, "grad_norm": 4.025111572088874, "learning_rate": 9.999669682744245e-06, "loss": 0.3607, "step": 369 }, { "epoch": 0.1, "grad_norm": 11.515811782591134, "learning_rate": 9.999652526425404e-06, "loss": 0.3738, "step": 370 }, { "epoch": 0.1, "grad_norm": 4.376679293359408, "learning_rate": 9.999634935789753e-06, "loss": 0.3696, "step": 371 }, { "epoch": 0.1, "grad_norm": 4.669377622956597, "learning_rate": 9.999616910838818e-06, "loss": 0.3421, "step": 372 }, { "epoch": 0.1, "grad_norm": 3.9162994981984234, "learning_rate": 9.999598451574167e-06, "loss": 0.364, "step": 373 }, { "epoch": 0.1, "grad_norm": 4.517952400999471, "learning_rate": 9.999579557997402e-06, "loss": 0.3449, "step": 374 }, { "epoch": 0.1, "grad_norm": 4.467483484309139, "learning_rate": 9.999560230110165e-06, "loss": 0.3194, "step": 375 }, { "epoch": 0.1, "grad_norm": 4.6303457867490785, "learning_rate": 9.999540467914133e-06, "loss": 0.3819, "step": 376 }, { "epoch": 0.1, "grad_norm": 4.3445923038232035, "learning_rate": 9.999520271411026e-06, "loss": 0.3334, "step": 377 }, { "epoch": 0.1, "grad_norm": 4.1673411257928565, "learning_rate": 9.999499640602597e-06, "loss": 0.3084, "step": 378 }, { "epoch": 0.1, "grad_norm": 3.9273173382754276, "learning_rate": 9.999478575490637e-06, "loss": 0.3342, "step": 379 }, { "epoch": 0.1, "grad_norm": 5.07461273599012, "learning_rate": 9.999457076076978e-06, "loss": 0.3015, "step": 380 }, { "epoch": 0.1, "grad_norm": 4.594858339869631, "learning_rate": 9.999435142363484e-06, "loss": 0.3753, "step": 381 }, { "epoch": 0.1, "grad_norm": 4.10065742206399, "learning_rate": 9.999412774352064e-06, "loss": 0.3432, "step": 382 }, { "epoch": 0.1, "grad_norm": 4.210509226933416, "learning_rate": 9.99938997204466e-06, "loss": 0.3507, "step": 383 }, { "epoch": 0.1, "grad_norm": 3.9802053693025004, "learning_rate": 9.999366735443255e-06, "loss": 0.3413, "step": 384 }, { "epoch": 0.11, "grad_norm": 3.809766404833952, "learning_rate": 9.999343064549862e-06, "loss": 0.3257, "step": 385 }, { "epoch": 0.11, "grad_norm": 4.008566832373406, "learning_rate": 9.999318959366543e-06, "loss": 0.335, "step": 386 }, { "epoch": 0.11, "grad_norm": 4.539104446749643, "learning_rate": 9.999294419895389e-06, "loss": 0.3836, "step": 387 }, { "epoch": 0.11, "grad_norm": 5.30428666363456, "learning_rate": 9.999269446138533e-06, "loss": 0.3678, "step": 388 }, { "epoch": 0.11, "grad_norm": 3.9484798240843726, "learning_rate": 9.999244038098144e-06, "loss": 0.3215, "step": 389 }, { "epoch": 0.11, "grad_norm": 3.886010977410963, "learning_rate": 9.999218195776428e-06, "loss": 0.3347, "step": 390 }, { "epoch": 0.11, "grad_norm": 4.361829511204051, "learning_rate": 9.99919191917563e-06, "loss": 0.3647, "step": 391 }, { "epoch": 0.11, "grad_norm": 4.04963150428601, "learning_rate": 9.999165208298034e-06, "loss": 0.333, "step": 392 }, { "epoch": 0.11, "grad_norm": 3.979505051692121, "learning_rate": 9.999138063145962e-06, "loss": 0.2983, "step": 393 }, { "epoch": 0.11, "grad_norm": 3.7829758596070415, "learning_rate": 9.999110483721767e-06, "loss": 0.3342, "step": 394 }, { "epoch": 0.11, "grad_norm": 4.145699243065273, "learning_rate": 9.99908247002785e-06, "loss": 0.313, "step": 395 }, { "epoch": 0.11, "grad_norm": 4.00672408957499, "learning_rate": 9.999054022066643e-06, "loss": 0.3357, "step": 396 }, { "epoch": 0.11, "grad_norm": 3.7406060827271075, "learning_rate": 9.999025139840615e-06, "loss": 0.3206, "step": 397 }, { "epoch": 0.11, "grad_norm": 4.585124069819002, "learning_rate": 9.998995823352276e-06, "loss": 0.4021, "step": 398 }, { "epoch": 0.11, "grad_norm": 4.209054176443017, "learning_rate": 9.998966072604175e-06, "loss": 0.3423, "step": 399 }, { "epoch": 0.11, "grad_norm": 4.12076488626069, "learning_rate": 9.998935887598894e-06, "loss": 0.36, "step": 400 }, { "epoch": 0.11, "grad_norm": 4.101877867787852, "learning_rate": 9.998905268339056e-06, "loss": 0.3212, "step": 401 }, { "epoch": 0.11, "grad_norm": 3.8115206401654493, "learning_rate": 9.99887421482732e-06, "loss": 0.2913, "step": 402 }, { "epoch": 0.11, "grad_norm": 4.26716023870372, "learning_rate": 9.998842727066385e-06, "loss": 0.3433, "step": 403 }, { "epoch": 0.11, "grad_norm": 4.186709441045844, "learning_rate": 9.998810805058986e-06, "loss": 0.335, "step": 404 }, { "epoch": 0.11, "grad_norm": 3.815787007607476, "learning_rate": 9.998778448807895e-06, "loss": 0.3196, "step": 405 }, { "epoch": 0.11, "grad_norm": 4.0619966376546826, "learning_rate": 9.998745658315924e-06, "loss": 0.2965, "step": 406 }, { "epoch": 0.11, "grad_norm": 4.021023049120253, "learning_rate": 9.998712433585919e-06, "loss": 0.3255, "step": 407 }, { "epoch": 0.11, "grad_norm": 4.195956615305363, "learning_rate": 9.998678774620771e-06, "loss": 0.3573, "step": 408 }, { "epoch": 0.11, "grad_norm": 4.049007998677101, "learning_rate": 9.9986446814234e-06, "loss": 0.3219, "step": 409 }, { "epoch": 0.11, "grad_norm": 4.123368548044139, "learning_rate": 9.998610153996768e-06, "loss": 0.3692, "step": 410 }, { "epoch": 0.11, "grad_norm": 3.9411023784619568, "learning_rate": 9.998575192343877e-06, "loss": 0.3117, "step": 411 }, { "epoch": 0.11, "grad_norm": 4.212240106003559, "learning_rate": 9.998539796467761e-06, "loss": 0.3124, "step": 412 }, { "epoch": 0.11, "grad_norm": 3.9840644647494647, "learning_rate": 9.998503966371496e-06, "loss": 0.3674, "step": 413 }, { "epoch": 0.11, "grad_norm": 4.1834298094248235, "learning_rate": 9.998467702058194e-06, "loss": 0.3571, "step": 414 }, { "epoch": 0.11, "grad_norm": 4.165577776843734, "learning_rate": 9.998431003531008e-06, "loss": 0.3215, "step": 415 }, { "epoch": 0.11, "grad_norm": 3.6707088750477324, "learning_rate": 9.99839387079312e-06, "loss": 0.3326, "step": 416 }, { "epoch": 0.11, "grad_norm": 3.6256633108539034, "learning_rate": 9.998356303847764e-06, "loss": 0.3201, "step": 417 }, { "epoch": 0.11, "grad_norm": 4.604686435985164, "learning_rate": 9.998318302698198e-06, "loss": 0.3844, "step": 418 }, { "epoch": 0.11, "grad_norm": 4.014397880699301, "learning_rate": 9.998279867347723e-06, "loss": 0.3235, "step": 419 }, { "epoch": 0.11, "grad_norm": 4.194092429654693, "learning_rate": 9.998240997799677e-06, "loss": 0.3497, "step": 420 }, { "epoch": 0.11, "grad_norm": 3.8496118027336963, "learning_rate": 9.998201694057441e-06, "loss": 0.3697, "step": 421 }, { "epoch": 0.12, "grad_norm": 3.881436284637087, "learning_rate": 9.998161956124428e-06, "loss": 0.3712, "step": 422 }, { "epoch": 0.12, "grad_norm": 4.046258792596131, "learning_rate": 9.998121784004086e-06, "loss": 0.4064, "step": 423 }, { "epoch": 0.12, "grad_norm": 4.165185372475956, "learning_rate": 9.998081177699909e-06, "loss": 0.3411, "step": 424 }, { "epoch": 0.12, "grad_norm": 3.8882062893973273, "learning_rate": 9.998040137215423e-06, "loss": 0.2749, "step": 425 }, { "epoch": 0.12, "grad_norm": 4.1282734757014845, "learning_rate": 9.997998662554194e-06, "loss": 0.352, "step": 426 }, { "epoch": 0.12, "grad_norm": 3.8967391743955373, "learning_rate": 9.997956753719821e-06, "loss": 0.3202, "step": 427 }, { "epoch": 0.12, "grad_norm": 4.435224973732747, "learning_rate": 9.99791441071595e-06, "loss": 0.3269, "step": 428 }, { "epoch": 0.12, "grad_norm": 3.9480633469602116, "learning_rate": 9.997871633546257e-06, "loss": 0.3644, "step": 429 }, { "epoch": 0.12, "grad_norm": 3.9663656432995764, "learning_rate": 9.997828422214458e-06, "loss": 0.3232, "step": 430 }, { "epoch": 0.12, "grad_norm": 3.8201026443227786, "learning_rate": 9.997784776724306e-06, "loss": 0.317, "step": 431 }, { "epoch": 0.12, "grad_norm": 3.9195793023804253, "learning_rate": 9.997740697079595e-06, "loss": 0.3004, "step": 432 }, { "epoch": 0.12, "grad_norm": 4.480878096091991, "learning_rate": 9.997696183284148e-06, "loss": 0.3938, "step": 433 }, { "epoch": 0.12, "grad_norm": 3.675436703113189, "learning_rate": 9.997651235341842e-06, "loss": 0.3086, "step": 434 }, { "epoch": 0.12, "grad_norm": 3.7856809338775257, "learning_rate": 9.997605853256572e-06, "loss": 0.3335, "step": 435 }, { "epoch": 0.12, "grad_norm": 3.5992673548883913, "learning_rate": 9.997560037032283e-06, "loss": 0.2924, "step": 436 }, { "epoch": 0.12, "grad_norm": 4.189646847760906, "learning_rate": 9.997513786672959e-06, "loss": 0.3634, "step": 437 }, { "epoch": 0.12, "grad_norm": 3.768981956623533, "learning_rate": 9.997467102182614e-06, "loss": 0.3204, "step": 438 }, { "epoch": 0.12, "grad_norm": 3.725180570879781, "learning_rate": 9.997419983565304e-06, "loss": 0.3317, "step": 439 }, { "epoch": 0.12, "grad_norm": 3.8185595584113607, "learning_rate": 9.997372430825125e-06, "loss": 0.3579, "step": 440 }, { "epoch": 0.12, "grad_norm": 3.7944729920242954, "learning_rate": 9.997324443966203e-06, "loss": 0.3194, "step": 441 }, { "epoch": 0.12, "grad_norm": 3.9814502632301445, "learning_rate": 9.997276022992709e-06, "loss": 0.3642, "step": 442 }, { "epoch": 0.12, "grad_norm": 3.759728753298408, "learning_rate": 9.997227167908849e-06, "loss": 0.3272, "step": 443 }, { "epoch": 0.12, "grad_norm": 3.76763352101135, "learning_rate": 9.99717787871887e-06, "loss": 0.3346, "step": 444 }, { "epoch": 0.12, "grad_norm": 3.5291936249395457, "learning_rate": 9.997128155427047e-06, "loss": 0.3079, "step": 445 }, { "epoch": 0.12, "grad_norm": 3.971559126992198, "learning_rate": 9.997077998037707e-06, "loss": 0.3383, "step": 446 }, { "epoch": 0.12, "grad_norm": 3.7248181192929812, "learning_rate": 9.997027406555201e-06, "loss": 0.3018, "step": 447 }, { "epoch": 0.12, "grad_norm": 3.5357343304449085, "learning_rate": 9.996976380983927e-06, "loss": 0.3082, "step": 448 }, { "epoch": 0.12, "grad_norm": 4.538354569891425, "learning_rate": 9.99692492132832e-06, "loss": 0.3561, "step": 449 }, { "epoch": 0.12, "grad_norm": 3.6579649325136776, "learning_rate": 9.996873027592844e-06, "loss": 0.3217, "step": 450 }, { "epoch": 0.12, "grad_norm": 3.6764037888712315, "learning_rate": 9.99682069978201e-06, "loss": 0.3061, "step": 451 }, { "epoch": 0.12, "grad_norm": 3.727210693944586, "learning_rate": 9.996767937900367e-06, "loss": 0.3459, "step": 452 }, { "epoch": 0.12, "grad_norm": 3.610670266073993, "learning_rate": 9.996714741952493e-06, "loss": 0.3041, "step": 453 }, { "epoch": 0.12, "grad_norm": 3.6911017688401393, "learning_rate": 9.99666111194301e-06, "loss": 0.3275, "step": 454 }, { "epoch": 0.12, "grad_norm": 3.7116898131973532, "learning_rate": 9.996607047876582e-06, "loss": 0.3161, "step": 455 }, { "epoch": 0.12, "grad_norm": 4.051156243428742, "learning_rate": 9.9965525497579e-06, "loss": 0.3463, "step": 456 }, { "epoch": 0.12, "grad_norm": 3.8719930210911286, "learning_rate": 9.9964976175917e-06, "loss": 0.3241, "step": 457 }, { "epoch": 0.13, "grad_norm": 3.8748489302929148, "learning_rate": 9.996442251382754e-06, "loss": 0.3513, "step": 458 }, { "epoch": 0.13, "grad_norm": 3.533801535678403, "learning_rate": 9.996386451135871e-06, "loss": 0.3055, "step": 459 }, { "epoch": 0.13, "grad_norm": 4.100961801715447, "learning_rate": 9.996330216855901e-06, "loss": 0.358, "step": 460 }, { "epoch": 0.13, "grad_norm": 3.6075421336937636, "learning_rate": 9.996273548547724e-06, "loss": 0.3037, "step": 461 }, { "epoch": 0.13, "grad_norm": 3.521861829096483, "learning_rate": 9.996216446216267e-06, "loss": 0.3395, "step": 462 }, { "epoch": 0.13, "grad_norm": 3.9016695749723147, "learning_rate": 9.99615890986649e-06, "loss": 0.3351, "step": 463 }, { "epoch": 0.13, "grad_norm": 3.835776071517302, "learning_rate": 9.996100939503387e-06, "loss": 0.3165, "step": 464 }, { "epoch": 0.13, "grad_norm": 3.427218450275447, "learning_rate": 9.996042535132001e-06, "loss": 0.3049, "step": 465 }, { "epoch": 0.13, "grad_norm": 3.9211356318146557, "learning_rate": 9.9959836967574e-06, "loss": 0.279, "step": 466 }, { "epoch": 0.13, "grad_norm": 3.759359701375455, "learning_rate": 9.995924424384697e-06, "loss": 0.3253, "step": 467 }, { "epoch": 0.13, "grad_norm": 7.4364516776188925, "learning_rate": 9.995864718019042e-06, "loss": 0.3325, "step": 468 }, { "epoch": 0.13, "grad_norm": 3.3747990747650687, "learning_rate": 9.995804577665617e-06, "loss": 0.2497, "step": 469 }, { "epoch": 0.13, "grad_norm": 4.00746261820677, "learning_rate": 9.995744003329655e-06, "loss": 0.3352, "step": 470 }, { "epoch": 0.13, "grad_norm": 3.65870207006295, "learning_rate": 9.995682995016409e-06, "loss": 0.3021, "step": 471 }, { "epoch": 0.13, "grad_norm": 4.498382997724767, "learning_rate": 9.995621552731182e-06, "loss": 0.3284, "step": 472 }, { "epoch": 0.13, "grad_norm": 4.553554526451541, "learning_rate": 9.995559676479317e-06, "loss": 0.2751, "step": 473 }, { "epoch": 0.13, "grad_norm": 4.10259191393736, "learning_rate": 9.99549736626618e-06, "loss": 0.3617, "step": 474 }, { "epoch": 0.13, "grad_norm": 4.825525380444603, "learning_rate": 9.995434622097189e-06, "loss": 0.3434, "step": 475 }, { "epoch": 0.13, "grad_norm": 4.062851973559195, "learning_rate": 9.995371443977794e-06, "loss": 0.3673, "step": 476 }, { "epoch": 0.13, "grad_norm": 4.708189013193882, "learning_rate": 9.995307831913483e-06, "loss": 0.3416, "step": 477 }, { "epoch": 0.13, "grad_norm": 4.63564165057117, "learning_rate": 9.995243785909782e-06, "loss": 0.3429, "step": 478 }, { "epoch": 0.13, "grad_norm": 3.6879984610544487, "learning_rate": 9.995179305972256e-06, "loss": 0.32, "step": 479 }, { "epoch": 0.13, "grad_norm": 3.787211983521054, "learning_rate": 9.995114392106502e-06, "loss": 0.3699, "step": 480 }, { "epoch": 0.13, "grad_norm": 4.327141793920121, "learning_rate": 9.995049044318164e-06, "loss": 0.3235, "step": 481 }, { "epoch": 0.13, "grad_norm": 3.833920065352593, "learning_rate": 9.994983262612916e-06, "loss": 0.3613, "step": 482 }, { "epoch": 0.13, "grad_norm": 4.164456973598666, "learning_rate": 9.994917046996472e-06, "loss": 0.3743, "step": 483 }, { "epoch": 0.13, "grad_norm": 3.743020193674409, "learning_rate": 9.994850397474588e-06, "loss": 0.3439, "step": 484 }, { "epoch": 0.13, "grad_norm": 3.515403754484158, "learning_rate": 9.994783314053047e-06, "loss": 0.2701, "step": 485 }, { "epoch": 0.13, "grad_norm": 4.048462614319834, "learning_rate": 9.994715796737683e-06, "loss": 0.3045, "step": 486 }, { "epoch": 0.13, "grad_norm": 4.188690490763364, "learning_rate": 9.994647845534357e-06, "loss": 0.3726, "step": 487 }, { "epoch": 0.13, "grad_norm": 4.307582758326019, "learning_rate": 9.994579460448975e-06, "loss": 0.3283, "step": 488 }, { "epoch": 0.13, "grad_norm": 3.922971038535889, "learning_rate": 9.994510641487477e-06, "loss": 0.3773, "step": 489 }, { "epoch": 0.13, "grad_norm": 3.7130894724742296, "learning_rate": 9.994441388655837e-06, "loss": 0.3614, "step": 490 }, { "epoch": 0.13, "grad_norm": 3.5745668345635844, "learning_rate": 9.994371701960077e-06, "loss": 0.3129, "step": 491 }, { "epoch": 0.13, "grad_norm": 4.06846832131553, "learning_rate": 9.994301581406247e-06, "loss": 0.3356, "step": 492 }, { "epoch": 0.13, "grad_norm": 4.051494997930581, "learning_rate": 9.994231027000439e-06, "loss": 0.3086, "step": 493 }, { "epoch": 0.13, "grad_norm": 3.45797411695485, "learning_rate": 9.994160038748783e-06, "loss": 0.3053, "step": 494 }, { "epoch": 0.14, "grad_norm": 3.8783408338046015, "learning_rate": 9.994088616657445e-06, "loss": 0.3127, "step": 495 }, { "epoch": 0.14, "grad_norm": 3.7568169990750424, "learning_rate": 9.99401676073263e-06, "loss": 0.2881, "step": 496 }, { "epoch": 0.14, "grad_norm": 3.7328241539684694, "learning_rate": 9.993944470980576e-06, "loss": 0.3311, "step": 497 }, { "epoch": 0.14, "grad_norm": 3.8466499584832587, "learning_rate": 9.99387174740757e-06, "loss": 0.3281, "step": 498 }, { "epoch": 0.14, "grad_norm": 3.7481130912508873, "learning_rate": 9.993798590019924e-06, "loss": 0.3589, "step": 499 }, { "epoch": 0.14, "grad_norm": 3.5427552057156704, "learning_rate": 9.993724998823995e-06, "loss": 0.305, "step": 500 }, { "epoch": 0.14, "grad_norm": 3.584783558606137, "learning_rate": 9.993650973826177e-06, "loss": 0.3003, "step": 501 }, { "epoch": 0.14, "grad_norm": 4.312690359942732, "learning_rate": 9.993576515032896e-06, "loss": 0.3202, "step": 502 }, { "epoch": 0.14, "grad_norm": 4.023569302547953, "learning_rate": 9.993501622450626e-06, "loss": 0.3255, "step": 503 }, { "epoch": 0.14, "grad_norm": 3.959083722012039, "learning_rate": 9.99342629608587e-06, "loss": 0.3449, "step": 504 }, { "epoch": 0.14, "grad_norm": 3.6409605833348713, "learning_rate": 9.993350535945172e-06, "loss": 0.3122, "step": 505 }, { "epoch": 0.14, "grad_norm": 3.881365220819395, "learning_rate": 9.993274342035111e-06, "loss": 0.3485, "step": 506 }, { "epoch": 0.14, "grad_norm": 3.875874738456161, "learning_rate": 9.99319771436231e-06, "loss": 0.3314, "step": 507 }, { "epoch": 0.14, "grad_norm": 3.6973392319182214, "learning_rate": 9.993120652933424e-06, "loss": 0.3234, "step": 508 }, { "epoch": 0.14, "grad_norm": 4.3021385034497746, "learning_rate": 9.993043157755145e-06, "loss": 0.3478, "step": 509 }, { "epoch": 0.14, "grad_norm": 3.990629222446292, "learning_rate": 9.992965228834208e-06, "loss": 0.3751, "step": 510 }, { "epoch": 0.14, "grad_norm": 3.790412874579726, "learning_rate": 9.99288686617738e-06, "loss": 0.3689, "step": 511 }, { "epoch": 0.14, "grad_norm": 4.011892983516215, "learning_rate": 9.992808069791472e-06, "loss": 0.3274, "step": 512 }, { "epoch": 0.14, "grad_norm": 3.7001945245196604, "learning_rate": 9.992728839683324e-06, "loss": 0.289, "step": 513 }, { "epoch": 0.14, "grad_norm": 3.9138478787249467, "learning_rate": 9.992649175859822e-06, "loss": 0.3401, "step": 514 }, { "epoch": 0.14, "grad_norm": 4.463325827559996, "learning_rate": 9.992569078327886e-06, "loss": 0.3335, "step": 515 }, { "epoch": 0.14, "grad_norm": 3.7471733764932624, "learning_rate": 9.992488547094474e-06, "loss": 0.3605, "step": 516 }, { "epoch": 0.14, "grad_norm": 3.5089807155299573, "learning_rate": 9.992407582166582e-06, "loss": 0.3054, "step": 517 }, { "epoch": 0.14, "grad_norm": 3.693730588351396, "learning_rate": 9.992326183551242e-06, "loss": 0.2857, "step": 518 }, { "epoch": 0.14, "grad_norm": 3.7079229821251385, "learning_rate": 9.992244351255526e-06, "loss": 0.316, "step": 519 }, { "epoch": 0.14, "grad_norm": 3.67808542959306, "learning_rate": 9.992162085286543e-06, "loss": 0.2683, "step": 520 }, { "epoch": 0.14, "grad_norm": 3.809588995572531, "learning_rate": 9.992079385651439e-06, "loss": 0.3048, "step": 521 }, { "epoch": 0.14, "grad_norm": 3.9332022803075697, "learning_rate": 9.9919962523574e-06, "loss": 0.3053, "step": 522 }, { "epoch": 0.14, "grad_norm": 3.6569290266564947, "learning_rate": 9.991912685411642e-06, "loss": 0.2779, "step": 523 }, { "epoch": 0.14, "grad_norm": 3.9510493670523834, "learning_rate": 9.99182868482143e-06, "loss": 0.3166, "step": 524 }, { "epoch": 0.14, "grad_norm": 4.1862387312553935, "learning_rate": 9.99174425059406e-06, "loss": 0.3299, "step": 525 }, { "epoch": 0.14, "grad_norm": 3.9389947846094957, "learning_rate": 9.991659382736864e-06, "loss": 0.3245, "step": 526 }, { "epoch": 0.14, "grad_norm": 4.101612951270305, "learning_rate": 9.991574081257219e-06, "loss": 0.3047, "step": 527 }, { "epoch": 0.14, "grad_norm": 3.566958804625748, "learning_rate": 9.99148834616253e-06, "loss": 0.3102, "step": 528 }, { "epoch": 0.14, "grad_norm": 3.482376189919131, "learning_rate": 9.99140217746025e-06, "loss": 0.322, "step": 529 }, { "epoch": 0.14, "grad_norm": 3.9368497200134085, "learning_rate": 9.991315575157861e-06, "loss": 0.3134, "step": 530 }, { "epoch": 0.14, "grad_norm": 4.476457092838848, "learning_rate": 9.991228539262886e-06, "loss": 0.3566, "step": 531 }, { "epoch": 0.15, "grad_norm": 4.137508305832424, "learning_rate": 9.991141069782886e-06, "loss": 0.3556, "step": 532 }, { "epoch": 0.15, "grad_norm": 4.766688493619562, "learning_rate": 9.99105316672546e-06, "loss": 0.3219, "step": 533 }, { "epoch": 0.15, "grad_norm": 3.457024072940931, "learning_rate": 9.990964830098246e-06, "loss": 0.2686, "step": 534 }, { "epoch": 0.15, "grad_norm": 3.9299979659182602, "learning_rate": 9.990876059908915e-06, "loss": 0.3281, "step": 535 }, { "epoch": 0.15, "grad_norm": 3.5873933053383884, "learning_rate": 9.990786856165178e-06, "loss": 0.3214, "step": 536 }, { "epoch": 0.15, "grad_norm": 3.8995754008029153, "learning_rate": 9.990697218874788e-06, "loss": 0.3122, "step": 537 }, { "epoch": 0.15, "grad_norm": 4.100429866752299, "learning_rate": 9.990607148045526e-06, "loss": 0.3295, "step": 538 }, { "epoch": 0.15, "grad_norm": 3.746335512814467, "learning_rate": 9.990516643685222e-06, "loss": 0.3116, "step": 539 }, { "epoch": 0.15, "grad_norm": 3.492998895598886, "learning_rate": 9.990425705801733e-06, "loss": 0.3205, "step": 540 }, { "epoch": 0.15, "grad_norm": 3.8407713955138734, "learning_rate": 9.990334334402964e-06, "loss": 0.3363, "step": 541 }, { "epoch": 0.15, "grad_norm": 3.4686474170387442, "learning_rate": 9.990242529496848e-06, "loss": 0.2929, "step": 542 }, { "epoch": 0.15, "grad_norm": 4.067101646488163, "learning_rate": 9.990150291091363e-06, "loss": 0.3225, "step": 543 }, { "epoch": 0.15, "grad_norm": 3.6481539958661005, "learning_rate": 9.990057619194517e-06, "loss": 0.3372, "step": 544 }, { "epoch": 0.15, "grad_norm": 3.5511909125879906, "learning_rate": 9.989964513814368e-06, "loss": 0.2825, "step": 545 }, { "epoch": 0.15, "grad_norm": 3.7857716862895283, "learning_rate": 9.989870974958997e-06, "loss": 0.3556, "step": 546 }, { "epoch": 0.15, "grad_norm": 4.092788081965213, "learning_rate": 9.989777002636533e-06, "loss": 0.3153, "step": 547 }, { "epoch": 0.15, "grad_norm": 4.104019121056975, "learning_rate": 9.989682596855138e-06, "loss": 0.3178, "step": 548 }, { "epoch": 0.15, "grad_norm": 3.912919278414138, "learning_rate": 9.989587757623015e-06, "loss": 0.3099, "step": 549 }, { "epoch": 0.15, "grad_norm": 4.512015459175421, "learning_rate": 9.9894924849484e-06, "loss": 0.3071, "step": 550 }, { "epoch": 0.15, "grad_norm": 3.8098330049714644, "learning_rate": 9.989396778839572e-06, "loss": 0.3322, "step": 551 }, { "epoch": 0.15, "grad_norm": 3.6136184825769444, "learning_rate": 9.989300639304843e-06, "loss": 0.2779, "step": 552 }, { "epoch": 0.15, "grad_norm": 3.411006081757144, "learning_rate": 9.989204066352565e-06, "loss": 0.2884, "step": 553 }, { "epoch": 0.15, "grad_norm": 3.4235555412349874, "learning_rate": 9.989107059991127e-06, "loss": 0.3342, "step": 554 }, { "epoch": 0.15, "grad_norm": 3.4669876679296965, "learning_rate": 9.989009620228957e-06, "loss": 0.2853, "step": 555 }, { "epoch": 0.15, "grad_norm": 3.9654896484091946, "learning_rate": 9.988911747074518e-06, "loss": 0.3398, "step": 556 }, { "epoch": 0.15, "grad_norm": 3.618634090408832, "learning_rate": 9.988813440536314e-06, "loss": 0.3031, "step": 557 }, { "epoch": 0.15, "grad_norm": 4.709794299197405, "learning_rate": 9.988714700622882e-06, "loss": 0.3045, "step": 558 }, { "epoch": 0.15, "grad_norm": 3.6379338775764603, "learning_rate": 9.988615527342801e-06, "loss": 0.3287, "step": 559 }, { "epoch": 0.15, "grad_norm": 3.906541445398935, "learning_rate": 9.988515920704689e-06, "loss": 0.3446, "step": 560 }, { "epoch": 0.15, "grad_norm": 3.802958218449508, "learning_rate": 9.988415880717195e-06, "loss": 0.294, "step": 561 }, { "epoch": 0.15, "grad_norm": 3.5750411292773627, "learning_rate": 9.988315407389009e-06, "loss": 0.284, "step": 562 }, { "epoch": 0.15, "grad_norm": 3.3997428129594223, "learning_rate": 9.988214500728862e-06, "loss": 0.2854, "step": 563 }, { "epoch": 0.15, "grad_norm": 3.852934108845612, "learning_rate": 9.988113160745519e-06, "loss": 0.3597, "step": 564 }, { "epoch": 0.15, "grad_norm": 3.8019680400237608, "learning_rate": 9.988011387447781e-06, "loss": 0.323, "step": 565 }, { "epoch": 0.15, "grad_norm": 3.8149719698103, "learning_rate": 9.987909180844491e-06, "loss": 0.3083, "step": 566 }, { "epoch": 0.15, "grad_norm": 4.079548503486936, "learning_rate": 9.987806540944528e-06, "loss": 0.3289, "step": 567 }, { "epoch": 0.16, "grad_norm": 3.90149328690206, "learning_rate": 9.987703467756807e-06, "loss": 0.329, "step": 568 }, { "epoch": 0.16, "grad_norm": 3.99169371798887, "learning_rate": 9.987599961290283e-06, "loss": 0.3305, "step": 569 }, { "epoch": 0.16, "grad_norm": 3.2957002191485714, "learning_rate": 9.987496021553946e-06, "loss": 0.2987, "step": 570 }, { "epoch": 0.16, "grad_norm": 3.3513096485139275, "learning_rate": 9.987391648556828e-06, "loss": 0.3049, "step": 571 }, { "epoch": 0.16, "grad_norm": 3.631545057379283, "learning_rate": 9.987286842307991e-06, "loss": 0.2969, "step": 572 }, { "epoch": 0.16, "grad_norm": 3.3346306865300264, "learning_rate": 9.987181602816545e-06, "loss": 0.3115, "step": 573 }, { "epoch": 0.16, "grad_norm": 4.168666835132431, "learning_rate": 9.987075930091629e-06, "loss": 0.3722, "step": 574 }, { "epoch": 0.16, "grad_norm": 3.24943490302494, "learning_rate": 9.986969824142424e-06, "loss": 0.2941, "step": 575 }, { "epoch": 0.16, "grad_norm": 3.5940233382677063, "learning_rate": 9.986863284978143e-06, "loss": 0.3144, "step": 576 }, { "epoch": 0.16, "grad_norm": 3.390409293378752, "learning_rate": 9.986756312608048e-06, "loss": 0.3172, "step": 577 }, { "epoch": 0.16, "grad_norm": 3.5327688539081157, "learning_rate": 9.986648907041428e-06, "loss": 0.2818, "step": 578 }, { "epoch": 0.16, "grad_norm": 3.6394050896129273, "learning_rate": 9.986541068287612e-06, "loss": 0.304, "step": 579 }, { "epoch": 0.16, "grad_norm": 3.940551438373569, "learning_rate": 9.98643279635597e-06, "loss": 0.3581, "step": 580 }, { "epoch": 0.16, "grad_norm": 3.69009213992704, "learning_rate": 9.986324091255908e-06, "loss": 0.2925, "step": 581 }, { "epoch": 0.16, "grad_norm": 3.95483477723251, "learning_rate": 9.986214952996867e-06, "loss": 0.3411, "step": 582 }, { "epoch": 0.16, "grad_norm": 3.514909402110056, "learning_rate": 9.986105381588329e-06, "loss": 0.2952, "step": 583 }, { "epoch": 0.16, "grad_norm": 3.3243367049974073, "learning_rate": 9.985995377039812e-06, "loss": 0.2793, "step": 584 }, { "epoch": 0.16, "grad_norm": 3.5604191206416402, "learning_rate": 9.985884939360873e-06, "loss": 0.3374, "step": 585 }, { "epoch": 0.16, "grad_norm": 3.6168118282133177, "learning_rate": 9.985774068561102e-06, "loss": 0.3224, "step": 586 }, { "epoch": 0.16, "grad_norm": 3.4265991333209294, "learning_rate": 9.985662764650138e-06, "loss": 0.2998, "step": 587 }, { "epoch": 0.16, "grad_norm": 3.4462723878531967, "learning_rate": 9.98555102763764e-06, "loss": 0.2972, "step": 588 }, { "epoch": 0.16, "grad_norm": 3.987835596012852, "learning_rate": 9.985438857533323e-06, "loss": 0.3782, "step": 589 }, { "epoch": 0.16, "grad_norm": 3.5532748451596614, "learning_rate": 9.985326254346928e-06, "loss": 0.287, "step": 590 }, { "epoch": 0.16, "grad_norm": 3.381271295289075, "learning_rate": 9.985213218088234e-06, "loss": 0.2783, "step": 591 }, { "epoch": 0.16, "grad_norm": 3.5978459141959895, "learning_rate": 9.985099748767065e-06, "loss": 0.3263, "step": 592 }, { "epoch": 0.16, "grad_norm": 3.352022064648451, "learning_rate": 9.984985846393276e-06, "loss": 0.3487, "step": 593 }, { "epoch": 0.16, "grad_norm": 3.4412290772492145, "learning_rate": 9.98487151097676e-06, "loss": 0.3153, "step": 594 }, { "epoch": 0.16, "grad_norm": 3.7175627996193428, "learning_rate": 9.984756742527451e-06, "loss": 0.3069, "step": 595 }, { "epoch": 0.16, "grad_norm": 3.241936216144905, "learning_rate": 9.98464154105532e-06, "loss": 0.2914, "step": 596 }, { "epoch": 0.16, "grad_norm": 3.3858799911347424, "learning_rate": 9.984525906570372e-06, "loss": 0.3229, "step": 597 }, { "epoch": 0.16, "grad_norm": 4.212120349376869, "learning_rate": 9.984409839082654e-06, "loss": 0.3308, "step": 598 }, { "epoch": 0.16, "grad_norm": 3.702787114368071, "learning_rate": 9.984293338602249e-06, "loss": 0.3236, "step": 599 }, { "epoch": 0.16, "grad_norm": 3.5629364380620303, "learning_rate": 9.984176405139275e-06, "loss": 0.3187, "step": 600 }, { "epoch": 0.16, "grad_norm": 3.51753497626045, "learning_rate": 9.98405903870389e-06, "loss": 0.3297, "step": 601 }, { "epoch": 0.16, "grad_norm": 3.536184171564669, "learning_rate": 9.983941239306291e-06, "loss": 0.2962, "step": 602 }, { "epoch": 0.16, "grad_norm": 3.4298149652464835, "learning_rate": 9.983823006956714e-06, "loss": 0.3068, "step": 603 }, { "epoch": 0.16, "grad_norm": 3.5493663971781544, "learning_rate": 9.983704341665425e-06, "loss": 0.3154, "step": 604 }, { "epoch": 0.17, "grad_norm": 3.6615116590414227, "learning_rate": 9.983585243442733e-06, "loss": 0.3078, "step": 605 }, { "epoch": 0.17, "grad_norm": 4.027194215842722, "learning_rate": 9.983465712298985e-06, "loss": 0.357, "step": 606 }, { "epoch": 0.17, "grad_norm": 3.7860114628325077, "learning_rate": 9.983345748244566e-06, "loss": 0.3774, "step": 607 }, { "epoch": 0.17, "grad_norm": 3.631378294109237, "learning_rate": 9.983225351289896e-06, "loss": 0.3072, "step": 608 }, { "epoch": 0.17, "grad_norm": 3.6392994212400533, "learning_rate": 9.983104521445434e-06, "loss": 0.3156, "step": 609 }, { "epoch": 0.17, "grad_norm": 3.7247714179757434, "learning_rate": 9.982983258721675e-06, "loss": 0.3245, "step": 610 }, { "epoch": 0.17, "grad_norm": 3.4410774696668964, "learning_rate": 9.982861563129154e-06, "loss": 0.3114, "step": 611 }, { "epoch": 0.17, "grad_norm": 3.5363650805327866, "learning_rate": 9.982739434678444e-06, "loss": 0.284, "step": 612 }, { "epoch": 0.17, "grad_norm": 3.5516954195413963, "learning_rate": 9.982616873380151e-06, "loss": 0.3116, "step": 613 }, { "epoch": 0.17, "grad_norm": 3.3188741657324146, "learning_rate": 9.982493879244925e-06, "loss": 0.2762, "step": 614 }, { "epoch": 0.17, "grad_norm": 3.5845694867073155, "learning_rate": 9.982370452283451e-06, "loss": 0.341, "step": 615 }, { "epoch": 0.17, "grad_norm": 3.8009145877378963, "learning_rate": 9.982246592506446e-06, "loss": 0.3373, "step": 616 }, { "epoch": 0.17, "grad_norm": 3.691928228524045, "learning_rate": 9.982122299924676e-06, "loss": 0.2776, "step": 617 }, { "epoch": 0.17, "grad_norm": 3.3401087126991134, "learning_rate": 9.981997574548933e-06, "loss": 0.3045, "step": 618 }, { "epoch": 0.17, "grad_norm": 3.3507709179044514, "learning_rate": 9.981872416390055e-06, "loss": 0.2873, "step": 619 }, { "epoch": 0.17, "grad_norm": 3.4071140932586723, "learning_rate": 9.981746825458914e-06, "loss": 0.3195, "step": 620 }, { "epoch": 0.17, "grad_norm": 3.4645877328068493, "learning_rate": 9.981620801766418e-06, "loss": 0.2886, "step": 621 }, { "epoch": 0.17, "grad_norm": 3.1229118687038695, "learning_rate": 9.981494345323516e-06, "loss": 0.2735, "step": 622 }, { "epoch": 0.17, "grad_norm": 5.202525560544861, "learning_rate": 9.981367456141193e-06, "loss": 0.3136, "step": 623 }, { "epoch": 0.17, "grad_norm": 3.4894889312577115, "learning_rate": 9.981240134230473e-06, "loss": 0.3027, "step": 624 }, { "epoch": 0.17, "grad_norm": 3.438132474262501, "learning_rate": 9.981112379602414e-06, "loss": 0.2794, "step": 625 }, { "epoch": 0.17, "grad_norm": 3.7382894579174724, "learning_rate": 9.980984192268116e-06, "loss": 0.3309, "step": 626 }, { "epoch": 0.17, "grad_norm": 3.3036678910272803, "learning_rate": 9.980855572238715e-06, "loss": 0.3006, "step": 627 }, { "epoch": 0.17, "grad_norm": 3.940467815872872, "learning_rate": 9.980726519525382e-06, "loss": 0.3199, "step": 628 }, { "epoch": 0.17, "grad_norm": 4.274658661689854, "learning_rate": 9.980597034139328e-06, "loss": 0.2977, "step": 629 }, { "epoch": 0.17, "grad_norm": 3.4313120747684582, "learning_rate": 9.980467116091803e-06, "loss": 0.2858, "step": 630 }, { "epoch": 0.17, "grad_norm": 3.6889372362101156, "learning_rate": 9.980336765394092e-06, "loss": 0.3416, "step": 631 }, { "epoch": 0.17, "grad_norm": 3.250325525490539, "learning_rate": 9.980205982057517e-06, "loss": 0.2786, "step": 632 }, { "epoch": 0.17, "grad_norm": 3.4879333564924555, "learning_rate": 9.980074766093442e-06, "loss": 0.3053, "step": 633 }, { "epoch": 0.17, "grad_norm": 3.777489549643108, "learning_rate": 9.979943117513265e-06, "loss": 0.3085, "step": 634 }, { "epoch": 0.17, "grad_norm": 3.2209229366135164, "learning_rate": 9.979811036328419e-06, "loss": 0.287, "step": 635 }, { "epoch": 0.17, "grad_norm": 3.2635223841120466, "learning_rate": 9.979678522550382e-06, "loss": 0.3024, "step": 636 }, { "epoch": 0.17, "grad_norm": 3.364567623220258, "learning_rate": 9.979545576190662e-06, "loss": 0.3159, "step": 637 }, { "epoch": 0.17, "grad_norm": 4.345702800279604, "learning_rate": 9.979412197260811e-06, "loss": 0.3457, "step": 638 }, { "epoch": 0.17, "grad_norm": 3.2566655299204696, "learning_rate": 9.979278385772414e-06, "loss": 0.2547, "step": 639 }, { "epoch": 0.17, "grad_norm": 3.4134939093571446, "learning_rate": 9.979144141737094e-06, "loss": 0.2887, "step": 640 }, { "epoch": 0.17, "grad_norm": 3.7000426602020213, "learning_rate": 9.979009465166515e-06, "loss": 0.3404, "step": 641 }, { "epoch": 0.18, "grad_norm": 3.580331355465202, "learning_rate": 9.978874356072376e-06, "loss": 0.2834, "step": 642 }, { "epoch": 0.18, "grad_norm": 3.5867870360792637, "learning_rate": 9.978738814466411e-06, "loss": 0.3177, "step": 643 }, { "epoch": 0.18, "grad_norm": 5.316895667360294, "learning_rate": 9.978602840360398e-06, "loss": 0.2988, "step": 644 }, { "epoch": 0.18, "grad_norm": 3.305031879529095, "learning_rate": 9.978466433766148e-06, "loss": 0.2752, "step": 645 }, { "epoch": 0.18, "grad_norm": 3.9269551875751225, "learning_rate": 9.978329594695508e-06, "loss": 0.2957, "step": 646 }, { "epoch": 0.18, "grad_norm": 3.256096704933166, "learning_rate": 9.978192323160368e-06, "loss": 0.2645, "step": 647 }, { "epoch": 0.18, "grad_norm": 3.440585663831507, "learning_rate": 9.978054619172652e-06, "loss": 0.2684, "step": 648 }, { "epoch": 0.18, "grad_norm": 3.6839411970396596, "learning_rate": 9.977916482744323e-06, "loss": 0.2976, "step": 649 }, { "epoch": 0.18, "grad_norm": 3.634028855036008, "learning_rate": 9.977777913887379e-06, "loss": 0.2793, "step": 650 }, { "epoch": 0.18, "grad_norm": 3.5660590265818954, "learning_rate": 9.977638912613858e-06, "loss": 0.3292, "step": 651 }, { "epoch": 0.18, "grad_norm": 3.7364939252998695, "learning_rate": 9.977499478935835e-06, "loss": 0.3336, "step": 652 }, { "epoch": 0.18, "grad_norm": 3.5216471106096296, "learning_rate": 9.977359612865424e-06, "loss": 0.2767, "step": 653 }, { "epoch": 0.18, "grad_norm": 3.484724921616459, "learning_rate": 9.977219314414773e-06, "loss": 0.3201, "step": 654 }, { "epoch": 0.18, "grad_norm": 4.0180796758846995, "learning_rate": 9.977078583596071e-06, "loss": 0.3285, "step": 655 }, { "epoch": 0.18, "grad_norm": 3.4677323811189806, "learning_rate": 9.976937420421543e-06, "loss": 0.2674, "step": 656 }, { "epoch": 0.18, "grad_norm": 3.3661415931659713, "learning_rate": 9.976795824903451e-06, "loss": 0.308, "step": 657 }, { "epoch": 0.18, "grad_norm": 3.353678458271599, "learning_rate": 9.976653797054097e-06, "loss": 0.2963, "step": 658 }, { "epoch": 0.18, "grad_norm": 3.982285147070446, "learning_rate": 9.976511336885815e-06, "loss": 0.3455, "step": 659 }, { "epoch": 0.18, "grad_norm": 3.995264844114513, "learning_rate": 9.976368444410985e-06, "loss": 0.2912, "step": 660 }, { "epoch": 0.18, "grad_norm": 3.5517290648902455, "learning_rate": 9.976225119642018e-06, "loss": 0.2731, "step": 661 }, { "epoch": 0.18, "grad_norm": 3.467066287909187, "learning_rate": 9.976081362591365e-06, "loss": 0.3061, "step": 662 }, { "epoch": 0.18, "grad_norm": 3.4463278196686407, "learning_rate": 9.975937173271513e-06, "loss": 0.2998, "step": 663 }, { "epoch": 0.18, "grad_norm": 3.7554152146835937, "learning_rate": 9.975792551694988e-06, "loss": 0.3322, "step": 664 }, { "epoch": 0.18, "grad_norm": 3.340958472323737, "learning_rate": 9.975647497874354e-06, "loss": 0.2882, "step": 665 }, { "epoch": 0.18, "grad_norm": 3.377911734877334, "learning_rate": 9.975502011822212e-06, "loss": 0.3085, "step": 666 }, { "epoch": 0.18, "grad_norm": 3.4037774066888984, "learning_rate": 9.975356093551198e-06, "loss": 0.2641, "step": 667 }, { "epoch": 0.18, "grad_norm": 3.6539226316218123, "learning_rate": 9.97520974307399e-06, "loss": 0.2875, "step": 668 }, { "epoch": 0.18, "grad_norm": 3.394718200753031, "learning_rate": 9.975062960403303e-06, "loss": 0.2769, "step": 669 }, { "epoch": 0.18, "grad_norm": 3.307075595532365, "learning_rate": 9.974915745551882e-06, "loss": 0.2774, "step": 670 }, { "epoch": 0.18, "grad_norm": 3.4330449767071975, "learning_rate": 9.974768098532521e-06, "loss": 0.2878, "step": 671 }, { "epoch": 0.18, "grad_norm": 3.528474114953311, "learning_rate": 9.974620019358046e-06, "loss": 0.298, "step": 672 }, { "epoch": 0.18, "grad_norm": 3.1364271194911257, "learning_rate": 9.974471508041317e-06, "loss": 0.2823, "step": 673 }, { "epoch": 0.18, "grad_norm": 3.6125744890901776, "learning_rate": 9.974322564595236e-06, "loss": 0.3083, "step": 674 }, { "epoch": 0.18, "grad_norm": 3.7447606021937654, "learning_rate": 9.974173189032744e-06, "loss": 0.2994, "step": 675 }, { "epoch": 0.18, "grad_norm": 3.6914472124919238, "learning_rate": 9.974023381366814e-06, "loss": 0.3038, "step": 676 }, { "epoch": 0.18, "grad_norm": 3.6122526597042492, "learning_rate": 9.973873141610462e-06, "loss": 0.3342, "step": 677 }, { "epoch": 0.19, "grad_norm": 3.793099827448773, "learning_rate": 9.973722469776739e-06, "loss": 0.2968, "step": 678 }, { "epoch": 0.19, "grad_norm": 3.809569548110065, "learning_rate": 9.973571365878732e-06, "loss": 0.3123, "step": 679 }, { "epoch": 0.19, "grad_norm": 3.6698612347943005, "learning_rate": 9.97341982992957e-06, "loss": 0.3125, "step": 680 }, { "epoch": 0.19, "grad_norm": 3.976927671573408, "learning_rate": 9.973267861942416e-06, "loss": 0.2685, "step": 681 }, { "epoch": 0.19, "grad_norm": 3.315185801763032, "learning_rate": 9.973115461930469e-06, "loss": 0.2547, "step": 682 }, { "epoch": 0.19, "grad_norm": 3.7387169895798125, "learning_rate": 9.97296262990697e-06, "loss": 0.3174, "step": 683 }, { "epoch": 0.19, "grad_norm": 3.583815067524512, "learning_rate": 9.972809365885197e-06, "loss": 0.3034, "step": 684 }, { "epoch": 0.19, "grad_norm": 3.389417405378248, "learning_rate": 9.972655669878462e-06, "loss": 0.3176, "step": 685 }, { "epoch": 0.19, "grad_norm": 3.770990982203721, "learning_rate": 9.972501541900115e-06, "loss": 0.302, "step": 686 }, { "epoch": 0.19, "grad_norm": 3.162530491530101, "learning_rate": 9.972346981963546e-06, "loss": 0.2757, "step": 687 }, { "epoch": 0.19, "grad_norm": 3.7118838342387286, "learning_rate": 9.972191990082183e-06, "loss": 0.2863, "step": 688 }, { "epoch": 0.19, "grad_norm": 3.380304387182788, "learning_rate": 9.97203656626949e-06, "loss": 0.2749, "step": 689 }, { "epoch": 0.19, "grad_norm": 3.589306708221887, "learning_rate": 9.971880710538967e-06, "loss": 0.2851, "step": 690 }, { "epoch": 0.19, "grad_norm": 3.310786736478933, "learning_rate": 9.971724422904154e-06, "loss": 0.2863, "step": 691 }, { "epoch": 0.19, "grad_norm": 3.3741422903867284, "learning_rate": 9.971567703378629e-06, "loss": 0.282, "step": 692 }, { "epoch": 0.19, "grad_norm": 3.6876230819120135, "learning_rate": 9.971410551976001e-06, "loss": 0.3207, "step": 693 }, { "epoch": 0.19, "grad_norm": 3.682734463658358, "learning_rate": 9.971252968709927e-06, "loss": 0.3094, "step": 694 }, { "epoch": 0.19, "grad_norm": 3.377390763626571, "learning_rate": 9.971094953594095e-06, "loss": 0.2732, "step": 695 }, { "epoch": 0.19, "grad_norm": 3.4195438055886367, "learning_rate": 9.970936506642232e-06, "loss": 0.2991, "step": 696 }, { "epoch": 0.19, "grad_norm": 3.092369251801411, "learning_rate": 9.9707776278681e-06, "loss": 0.2981, "step": 697 }, { "epoch": 0.19, "grad_norm": 3.724113439290786, "learning_rate": 9.970618317285501e-06, "loss": 0.3086, "step": 698 }, { "epoch": 0.19, "grad_norm": 3.194815294806323, "learning_rate": 9.970458574908277e-06, "loss": 0.2704, "step": 699 }, { "epoch": 0.19, "grad_norm": 3.362049297021861, "learning_rate": 9.970298400750303e-06, "loss": 0.3081, "step": 700 }, { "epoch": 0.19, "grad_norm": 3.546960570040055, "learning_rate": 9.970137794825491e-06, "loss": 0.3052, "step": 701 }, { "epoch": 0.19, "grad_norm": 3.5294080868743736, "learning_rate": 9.969976757147795e-06, "loss": 0.3013, "step": 702 }, { "epoch": 0.19, "grad_norm": 3.639605539281974, "learning_rate": 9.969815287731205e-06, "loss": 0.2826, "step": 703 }, { "epoch": 0.19, "grad_norm": 3.4951388093222646, "learning_rate": 9.969653386589749e-06, "loss": 0.3022, "step": 704 }, { "epoch": 0.19, "grad_norm": 3.684031151838524, "learning_rate": 9.969491053737487e-06, "loss": 0.2935, "step": 705 }, { "epoch": 0.19, "grad_norm": 3.5053122144135536, "learning_rate": 9.969328289188522e-06, "loss": 0.3145, "step": 706 }, { "epoch": 0.19, "grad_norm": 3.6128578564831915, "learning_rate": 9.969165092956996e-06, "loss": 0.2857, "step": 707 }, { "epoch": 0.19, "grad_norm": 3.4479859372531347, "learning_rate": 9.969001465057084e-06, "loss": 0.2982, "step": 708 }, { "epoch": 0.19, "grad_norm": 3.456346321232317, "learning_rate": 9.968837405502998e-06, "loss": 0.3118, "step": 709 }, { "epoch": 0.19, "grad_norm": 3.6896387714171346, "learning_rate": 9.968672914308995e-06, "loss": 0.3434, "step": 710 }, { "epoch": 0.19, "grad_norm": 3.7527125135135795, "learning_rate": 9.968507991489358e-06, "loss": 0.2592, "step": 711 }, { "epoch": 0.19, "grad_norm": 3.7451778291708675, "learning_rate": 9.968342637058418e-06, "loss": 0.3107, "step": 712 }, { "epoch": 0.19, "grad_norm": 3.3614085447903888, "learning_rate": 9.96817685103054e-06, "loss": 0.3142, "step": 713 }, { "epoch": 0.19, "grad_norm": 3.2611284861341434, "learning_rate": 9.968010633420122e-06, "loss": 0.2655, "step": 714 }, { "epoch": 0.2, "grad_norm": 3.5702815951786158, "learning_rate": 9.967843984241606e-06, "loss": 0.3223, "step": 715 }, { "epoch": 0.2, "grad_norm": 3.2223114339106003, "learning_rate": 9.967676903509467e-06, "loss": 0.2823, "step": 716 }, { "epoch": 0.2, "grad_norm": 3.355306906446752, "learning_rate": 9.967509391238218e-06, "loss": 0.3132, "step": 717 }, { "epoch": 0.2, "grad_norm": 3.44546429194885, "learning_rate": 9.967341447442418e-06, "loss": 0.2485, "step": 718 }, { "epoch": 0.2, "grad_norm": 3.1115232855635186, "learning_rate": 9.967173072136647e-06, "loss": 0.3042, "step": 719 }, { "epoch": 0.2, "grad_norm": 4.203475365793821, "learning_rate": 9.967004265335536e-06, "loss": 0.3106, "step": 720 }, { "epoch": 0.2, "grad_norm": 3.5798124241562346, "learning_rate": 9.96683502705375e-06, "loss": 0.2534, "step": 721 }, { "epoch": 0.2, "grad_norm": 3.2773676888747385, "learning_rate": 9.966665357305988e-06, "loss": 0.2713, "step": 722 }, { "epoch": 0.2, "grad_norm": 3.723228547540024, "learning_rate": 9.966495256106991e-06, "loss": 0.2826, "step": 723 }, { "epoch": 0.2, "grad_norm": 3.4860363526907254, "learning_rate": 9.966324723471535e-06, "loss": 0.3045, "step": 724 }, { "epoch": 0.2, "grad_norm": 3.3812284091208986, "learning_rate": 9.966153759414434e-06, "loss": 0.2848, "step": 725 }, { "epoch": 0.2, "grad_norm": 3.191936349781771, "learning_rate": 9.96598236395054e-06, "loss": 0.3016, "step": 726 }, { "epoch": 0.2, "grad_norm": 3.8881091244639414, "learning_rate": 9.965810537094741e-06, "loss": 0.3334, "step": 727 }, { "epoch": 0.2, "grad_norm": 3.4495767848787366, "learning_rate": 9.965638278861966e-06, "loss": 0.3044, "step": 728 }, { "epoch": 0.2, "grad_norm": 3.7543822685702954, "learning_rate": 9.965465589267176e-06, "loss": 0.3069, "step": 729 }, { "epoch": 0.2, "grad_norm": 3.4668386863288805, "learning_rate": 9.965292468325373e-06, "loss": 0.2744, "step": 730 }, { "epoch": 0.2, "grad_norm": 3.1985198222144184, "learning_rate": 9.965118916051597e-06, "loss": 0.2557, "step": 731 }, { "epoch": 0.2, "grad_norm": 3.995622422707886, "learning_rate": 9.964944932460923e-06, "loss": 0.3204, "step": 732 }, { "epoch": 0.2, "grad_norm": 3.2285352897398423, "learning_rate": 9.964770517568467e-06, "loss": 0.2864, "step": 733 }, { "epoch": 0.2, "grad_norm": 3.503607216309929, "learning_rate": 9.964595671389379e-06, "loss": 0.2887, "step": 734 }, { "epoch": 0.2, "grad_norm": 3.302761482883814, "learning_rate": 9.964420393938848e-06, "loss": 0.2739, "step": 735 }, { "epoch": 0.2, "grad_norm": 3.1116239504167753, "learning_rate": 9.964244685232098e-06, "loss": 0.2307, "step": 736 }, { "epoch": 0.2, "grad_norm": 3.6026380225463503, "learning_rate": 9.964068545284396e-06, "loss": 0.3025, "step": 737 }, { "epoch": 0.2, "grad_norm": 3.829076461368195, "learning_rate": 9.963891974111042e-06, "loss": 0.2869, "step": 738 }, { "epoch": 0.2, "grad_norm": 3.155511060956363, "learning_rate": 9.963714971727374e-06, "loss": 0.2742, "step": 739 }, { "epoch": 0.2, "grad_norm": 3.507236817874012, "learning_rate": 9.96353753814877e-06, "loss": 0.2853, "step": 740 }, { "epoch": 0.2, "grad_norm": 3.6058429077383973, "learning_rate": 9.96335967339064e-06, "loss": 0.2626, "step": 741 }, { "epoch": 0.2, "grad_norm": 3.790342132554691, "learning_rate": 9.96318137746844e-06, "loss": 0.3271, "step": 742 }, { "epoch": 0.2, "grad_norm": 3.565100051079855, "learning_rate": 9.963002650397655e-06, "loss": 0.3191, "step": 743 }, { "epoch": 0.2, "grad_norm": 3.4529144863883072, "learning_rate": 9.96282349219381e-06, "loss": 0.3003, "step": 744 }, { "epoch": 0.2, "grad_norm": 3.372454447920437, "learning_rate": 9.96264390287247e-06, "loss": 0.283, "step": 745 }, { "epoch": 0.2, "grad_norm": 3.3548046061817156, "learning_rate": 9.962463882449238e-06, "loss": 0.2864, "step": 746 }, { "epoch": 0.2, "grad_norm": 3.1307353004840004, "learning_rate": 9.96228343093975e-06, "loss": 0.2326, "step": 747 }, { "epoch": 0.2, "grad_norm": 3.3358542177937953, "learning_rate": 9.96210254835968e-06, "loss": 0.2748, "step": 748 }, { "epoch": 0.2, "grad_norm": 3.2425270065380642, "learning_rate": 9.961921234724743e-06, "loss": 0.2511, "step": 749 }, { "epoch": 0.2, "grad_norm": 3.230679448919394, "learning_rate": 9.96173949005069e-06, "loss": 0.2731, "step": 750 }, { "epoch": 0.21, "grad_norm": 6.8136454472507255, "learning_rate": 9.961557314353309e-06, "loss": 0.2846, "step": 751 }, { "epoch": 0.21, "grad_norm": 3.554129291982215, "learning_rate": 9.961374707648424e-06, "loss": 0.2745, "step": 752 }, { "epoch": 0.21, "grad_norm": 3.6373577331973843, "learning_rate": 9.9611916699519e-06, "loss": 0.2961, "step": 753 }, { "epoch": 0.21, "grad_norm": 3.6789201237509217, "learning_rate": 9.961008201279636e-06, "loss": 0.3321, "step": 754 }, { "epoch": 0.21, "grad_norm": 3.6832397156715637, "learning_rate": 9.960824301647569e-06, "loss": 0.3356, "step": 755 }, { "epoch": 0.21, "grad_norm": 3.432029081351122, "learning_rate": 9.960639971071677e-06, "loss": 0.2773, "step": 756 }, { "epoch": 0.21, "grad_norm": 3.7951568034748164, "learning_rate": 9.960455209567971e-06, "loss": 0.2427, "step": 757 }, { "epoch": 0.21, "grad_norm": 3.141713511374909, "learning_rate": 9.960270017152502e-06, "loss": 0.3027, "step": 758 }, { "epoch": 0.21, "grad_norm": 3.2027679396906596, "learning_rate": 9.960084393841355e-06, "loss": 0.2652, "step": 759 }, { "epoch": 0.21, "grad_norm": 3.4805531515238735, "learning_rate": 9.959898339650658e-06, "loss": 0.2826, "step": 760 }, { "epoch": 0.21, "grad_norm": 3.686151448013631, "learning_rate": 9.959711854596573e-06, "loss": 0.3276, "step": 761 }, { "epoch": 0.21, "grad_norm": 3.2125987881135933, "learning_rate": 9.959524938695296e-06, "loss": 0.258, "step": 762 }, { "epoch": 0.21, "grad_norm": 3.2359755147444935, "learning_rate": 9.959337591963069e-06, "loss": 0.2894, "step": 763 }, { "epoch": 0.21, "grad_norm": 3.6188907281506673, "learning_rate": 9.959149814416165e-06, "loss": 0.2608, "step": 764 }, { "epoch": 0.21, "grad_norm": 3.6435810728364535, "learning_rate": 9.958961606070896e-06, "loss": 0.2962, "step": 765 }, { "epoch": 0.21, "grad_norm": 3.3582571673054757, "learning_rate": 9.958772966943612e-06, "loss": 0.29, "step": 766 }, { "epoch": 0.21, "grad_norm": 3.34219987746046, "learning_rate": 9.9585838970507e-06, "loss": 0.2694, "step": 767 }, { "epoch": 0.21, "grad_norm": 3.6100025686666433, "learning_rate": 9.958394396408583e-06, "loss": 0.267, "step": 768 }, { "epoch": 0.21, "grad_norm": 3.399622053459657, "learning_rate": 9.958204465033726e-06, "loss": 0.2862, "step": 769 }, { "epoch": 0.21, "grad_norm": 3.0979020336646337, "learning_rate": 9.958014102942623e-06, "loss": 0.262, "step": 770 }, { "epoch": 0.21, "grad_norm": 3.555138762200467, "learning_rate": 9.957823310151816e-06, "loss": 0.2562, "step": 771 }, { "epoch": 0.21, "grad_norm": 3.8621948610660617, "learning_rate": 9.957632086677876e-06, "loss": 0.3156, "step": 772 }, { "epoch": 0.21, "grad_norm": 3.317185612079889, "learning_rate": 9.957440432537415e-06, "loss": 0.3039, "step": 773 }, { "epoch": 0.21, "grad_norm": 3.5648043021430467, "learning_rate": 9.957248347747083e-06, "loss": 0.2993, "step": 774 }, { "epoch": 0.21, "grad_norm": 3.3233896511158, "learning_rate": 9.957055832323566e-06, "loss": 0.2839, "step": 775 }, { "epoch": 0.21, "grad_norm": 3.099205957694149, "learning_rate": 9.956862886283586e-06, "loss": 0.2632, "step": 776 }, { "epoch": 0.21, "grad_norm": 3.4295710193400355, "learning_rate": 9.956669509643904e-06, "loss": 0.2774, "step": 777 }, { "epoch": 0.21, "grad_norm": 3.7061755972324506, "learning_rate": 9.95647570242132e-06, "loss": 0.334, "step": 778 }, { "epoch": 0.21, "grad_norm": 3.416689351648574, "learning_rate": 9.95628146463267e-06, "loss": 0.3067, "step": 779 }, { "epoch": 0.21, "grad_norm": 3.305566131288089, "learning_rate": 9.956086796294828e-06, "loss": 0.2681, "step": 780 }, { "epoch": 0.21, "grad_norm": 3.4932129502735303, "learning_rate": 9.955891697424704e-06, "loss": 0.3035, "step": 781 }, { "epoch": 0.21, "grad_norm": 2.8483997669145533, "learning_rate": 9.955696168039244e-06, "loss": 0.245, "step": 782 }, { "epoch": 0.21, "grad_norm": 3.4241581489682256, "learning_rate": 9.955500208155438e-06, "loss": 0.3039, "step": 783 }, { "epoch": 0.21, "grad_norm": 3.3286605374314213, "learning_rate": 9.955303817790303e-06, "loss": 0.2968, "step": 784 }, { "epoch": 0.21, "grad_norm": 2.9719950010043386, "learning_rate": 9.955106996960903e-06, "loss": 0.286, "step": 785 }, { "epoch": 0.21, "grad_norm": 3.0243052146947793, "learning_rate": 9.954909745684339e-06, "loss": 0.2745, "step": 786 }, { "epoch": 0.21, "grad_norm": 3.097466339402794, "learning_rate": 9.954712063977738e-06, "loss": 0.3112, "step": 787 }, { "epoch": 0.22, "grad_norm": 3.3248046861463463, "learning_rate": 9.954513951858279e-06, "loss": 0.2847, "step": 788 }, { "epoch": 0.22, "grad_norm": 3.204870899669546, "learning_rate": 9.95431540934317e-06, "loss": 0.2618, "step": 789 }, { "epoch": 0.22, "grad_norm": 3.3254016311962613, "learning_rate": 9.954116436449656e-06, "loss": 0.2929, "step": 790 }, { "epoch": 0.22, "grad_norm": 3.246936263303662, "learning_rate": 9.953917033195026e-06, "loss": 0.2656, "step": 791 }, { "epoch": 0.22, "grad_norm": 3.1496320423146447, "learning_rate": 9.953717199596598e-06, "loss": 0.2153, "step": 792 }, { "epoch": 0.22, "grad_norm": 3.175948122557839, "learning_rate": 9.953516935671734e-06, "loss": 0.288, "step": 793 }, { "epoch": 0.22, "grad_norm": 3.36687264722415, "learning_rate": 9.95331624143783e-06, "loss": 0.2937, "step": 794 }, { "epoch": 0.22, "grad_norm": 3.423492599897248, "learning_rate": 9.95311511691232e-06, "loss": 0.2754, "step": 795 }, { "epoch": 0.22, "grad_norm": 3.439266131874316, "learning_rate": 9.952913562112675e-06, "loss": 0.2964, "step": 796 }, { "epoch": 0.22, "grad_norm": 3.952737750541927, "learning_rate": 9.952711577056403e-06, "loss": 0.2723, "step": 797 }, { "epoch": 0.22, "grad_norm": 3.7179796705645707, "learning_rate": 9.952509161761056e-06, "loss": 0.3098, "step": 798 }, { "epoch": 0.22, "grad_norm": 3.404768195466816, "learning_rate": 9.95230631624421e-06, "loss": 0.2729, "step": 799 }, { "epoch": 0.22, "grad_norm": 3.3747019633380724, "learning_rate": 9.952103040523493e-06, "loss": 0.2692, "step": 800 }, { "epoch": 0.22, "grad_norm": 3.1322021548770578, "learning_rate": 9.951899334616559e-06, "loss": 0.2526, "step": 801 }, { "epoch": 0.22, "grad_norm": 3.5184023033352556, "learning_rate": 9.951695198541105e-06, "loss": 0.3073, "step": 802 }, { "epoch": 0.22, "grad_norm": 2.8511693229634583, "learning_rate": 9.951490632314863e-06, "loss": 0.2472, "step": 803 }, { "epoch": 0.22, "grad_norm": 3.0923307213726834, "learning_rate": 9.951285635955606e-06, "loss": 0.2473, "step": 804 }, { "epoch": 0.22, "grad_norm": 3.510034222113154, "learning_rate": 9.951080209481138e-06, "loss": 0.2886, "step": 805 }, { "epoch": 0.22, "grad_norm": 3.7110009649027296, "learning_rate": 9.95087435290931e-06, "loss": 0.341, "step": 806 }, { "epoch": 0.22, "grad_norm": 3.1540785530107054, "learning_rate": 9.950668066258e-06, "loss": 0.2444, "step": 807 }, { "epoch": 0.22, "grad_norm": 3.0355556075817396, "learning_rate": 9.950461349545131e-06, "loss": 0.2393, "step": 808 }, { "epoch": 0.22, "grad_norm": 3.1495907271209647, "learning_rate": 9.950254202788655e-06, "loss": 0.2486, "step": 809 }, { "epoch": 0.22, "grad_norm": 3.2902495330650754, "learning_rate": 9.950046626006575e-06, "loss": 0.2636, "step": 810 }, { "epoch": 0.22, "grad_norm": 3.3813702500369938, "learning_rate": 9.949838619216917e-06, "loss": 0.3369, "step": 811 }, { "epoch": 0.22, "grad_norm": 3.5617853787228464, "learning_rate": 9.949630182437753e-06, "loss": 0.2974, "step": 812 }, { "epoch": 0.22, "grad_norm": 3.386285071892533, "learning_rate": 9.949421315687186e-06, "loss": 0.2609, "step": 813 }, { "epoch": 0.22, "grad_norm": 3.6558861125591653, "learning_rate": 9.949212018983366e-06, "loss": 0.3085, "step": 814 }, { "epoch": 0.22, "grad_norm": 4.1046420451903805, "learning_rate": 9.94900229234447e-06, "loss": 0.2951, "step": 815 }, { "epoch": 0.22, "grad_norm": 3.667593788754319, "learning_rate": 9.94879213578872e-06, "loss": 0.3009, "step": 816 }, { "epoch": 0.22, "grad_norm": 3.104437788153663, "learning_rate": 9.948581549334368e-06, "loss": 0.2511, "step": 817 }, { "epoch": 0.22, "grad_norm": 3.8860325625070287, "learning_rate": 9.948370532999711e-06, "loss": 0.3225, "step": 818 }, { "epoch": 0.22, "grad_norm": 3.0756469664723327, "learning_rate": 9.948159086803078e-06, "loss": 0.2671, "step": 819 }, { "epoch": 0.22, "grad_norm": 3.690499751984658, "learning_rate": 9.94794721076284e-06, "loss": 0.315, "step": 820 }, { "epoch": 0.22, "grad_norm": 3.078693142640425, "learning_rate": 9.9477349048974e-06, "loss": 0.2147, "step": 821 }, { "epoch": 0.22, "grad_norm": 3.0451427390150854, "learning_rate": 9.9475221692252e-06, "loss": 0.2416, "step": 822 }, { "epoch": 0.22, "grad_norm": 3.4963280008252466, "learning_rate": 9.947309003764723e-06, "loss": 0.2981, "step": 823 }, { "epoch": 0.22, "grad_norm": 3.2465290072893165, "learning_rate": 9.947095408534483e-06, "loss": 0.2732, "step": 824 }, { "epoch": 0.23, "grad_norm": 3.218986694396093, "learning_rate": 9.94688138355304e-06, "loss": 0.2907, "step": 825 }, { "epoch": 0.23, "grad_norm": 3.5567896355598005, "learning_rate": 9.946666928838982e-06, "loss": 0.2915, "step": 826 }, { "epoch": 0.23, "grad_norm": 3.565878929589163, "learning_rate": 9.946452044410941e-06, "loss": 0.2851, "step": 827 }, { "epoch": 0.23, "grad_norm": 3.5318361932044806, "learning_rate": 9.946236730287582e-06, "loss": 0.2832, "step": 828 }, { "epoch": 0.23, "grad_norm": 3.2918080992345478, "learning_rate": 9.94602098648761e-06, "loss": 0.2585, "step": 829 }, { "epoch": 0.23, "grad_norm": 3.3094119631714847, "learning_rate": 9.945804813029767e-06, "loss": 0.3079, "step": 830 }, { "epoch": 0.23, "grad_norm": 2.9561930344791723, "learning_rate": 9.94558820993283e-06, "loss": 0.2417, "step": 831 }, { "epoch": 0.23, "grad_norm": 3.169239609485324, "learning_rate": 9.945371177215619e-06, "loss": 0.2555, "step": 832 }, { "epoch": 0.23, "grad_norm": 3.7261798459193667, "learning_rate": 9.945153714896982e-06, "loss": 0.3168, "step": 833 }, { "epoch": 0.23, "grad_norm": 2.8886358861284704, "learning_rate": 9.944935822995815e-06, "loss": 0.2451, "step": 834 }, { "epoch": 0.23, "grad_norm": 3.299749967695516, "learning_rate": 9.944717501531045e-06, "loss": 0.2478, "step": 835 }, { "epoch": 0.23, "grad_norm": 3.3127143993757824, "learning_rate": 9.944498750521634e-06, "loss": 0.2643, "step": 836 }, { "epoch": 0.23, "grad_norm": 3.2585992568654554, "learning_rate": 9.944279569986588e-06, "loss": 0.2957, "step": 837 }, { "epoch": 0.23, "grad_norm": 3.4082780034209983, "learning_rate": 9.944059959944948e-06, "loss": 0.3225, "step": 838 }, { "epoch": 0.23, "grad_norm": 3.012256541341559, "learning_rate": 9.943839920415787e-06, "loss": 0.253, "step": 839 }, { "epoch": 0.23, "grad_norm": 3.267056622102402, "learning_rate": 9.943619451418225e-06, "loss": 0.2514, "step": 840 }, { "epoch": 0.23, "grad_norm": 3.1673656637844374, "learning_rate": 9.943398552971409e-06, "loss": 0.3029, "step": 841 }, { "epoch": 0.23, "grad_norm": 3.8834986105335467, "learning_rate": 9.943177225094532e-06, "loss": 0.2973, "step": 842 }, { "epoch": 0.23, "grad_norm": 3.117308380535242, "learning_rate": 9.94295546780682e-06, "loss": 0.2223, "step": 843 }, { "epoch": 0.23, "grad_norm": 3.2606961416618687, "learning_rate": 9.942733281127536e-06, "loss": 0.2638, "step": 844 }, { "epoch": 0.23, "grad_norm": 3.4167423699936785, "learning_rate": 9.94251066507598e-06, "loss": 0.2807, "step": 845 }, { "epoch": 0.23, "grad_norm": 3.3056984549563353, "learning_rate": 9.942287619671494e-06, "loss": 0.3129, "step": 846 }, { "epoch": 0.23, "grad_norm": 3.1739941932147806, "learning_rate": 9.94206414493345e-06, "loss": 0.2726, "step": 847 }, { "epoch": 0.23, "grad_norm": 3.4368729160803593, "learning_rate": 9.941840240881265e-06, "loss": 0.2781, "step": 848 }, { "epoch": 0.23, "grad_norm": 3.5578848531342517, "learning_rate": 9.941615907534387e-06, "loss": 0.313, "step": 849 }, { "epoch": 0.23, "grad_norm": 2.9482153702050877, "learning_rate": 9.941391144912304e-06, "loss": 0.2456, "step": 850 }, { "epoch": 0.23, "grad_norm": 3.0827951822313717, "learning_rate": 9.94116595303454e-06, "loss": 0.2722, "step": 851 }, { "epoch": 0.23, "grad_norm": 3.129244654862273, "learning_rate": 9.94094033192066e-06, "loss": 0.2399, "step": 852 }, { "epoch": 0.23, "grad_norm": 3.4025518228901825, "learning_rate": 9.94071428159026e-06, "loss": 0.2997, "step": 853 }, { "epoch": 0.23, "grad_norm": 3.5116719524469544, "learning_rate": 9.940487802062979e-06, "loss": 0.3363, "step": 854 }, { "epoch": 0.23, "grad_norm": 3.1240186869932733, "learning_rate": 9.940260893358493e-06, "loss": 0.2955, "step": 855 }, { "epoch": 0.23, "grad_norm": 3.453953164988273, "learning_rate": 9.940033555496509e-06, "loss": 0.306, "step": 856 }, { "epoch": 0.23, "grad_norm": 3.461703476402533, "learning_rate": 9.939805788496778e-06, "loss": 0.2835, "step": 857 }, { "epoch": 0.23, "grad_norm": 3.1825409882710254, "learning_rate": 9.939577592379088e-06, "loss": 0.2549, "step": 858 }, { "epoch": 0.23, "grad_norm": 3.338118785789188, "learning_rate": 9.93934896716326e-06, "loss": 0.2726, "step": 859 }, { "epoch": 0.23, "grad_norm": 3.3044553116495945, "learning_rate": 9.939119912869155e-06, "loss": 0.2638, "step": 860 }, { "epoch": 0.24, "grad_norm": 3.0747782437008375, "learning_rate": 9.93889042951667e-06, "loss": 0.2713, "step": 861 }, { "epoch": 0.24, "grad_norm": 2.9895779820807205, "learning_rate": 9.93866051712574e-06, "loss": 0.2636, "step": 862 }, { "epoch": 0.24, "grad_norm": 3.363429208502432, "learning_rate": 9.93843017571634e-06, "loss": 0.2786, "step": 863 }, { "epoch": 0.24, "grad_norm": 3.391370227576883, "learning_rate": 9.938199405308475e-06, "loss": 0.2772, "step": 864 }, { "epoch": 0.24, "grad_norm": 3.216721416316368, "learning_rate": 9.937968205922198e-06, "loss": 0.2758, "step": 865 }, { "epoch": 0.24, "grad_norm": 3.033388138988561, "learning_rate": 9.937736577577587e-06, "loss": 0.2684, "step": 866 }, { "epoch": 0.24, "grad_norm": 3.7052363377283357, "learning_rate": 9.937504520294767e-06, "loss": 0.3103, "step": 867 }, { "epoch": 0.24, "grad_norm": 3.30732787785901, "learning_rate": 9.937272034093897e-06, "loss": 0.2968, "step": 868 }, { "epoch": 0.24, "grad_norm": 3.4051708530518447, "learning_rate": 9.93703911899517e-06, "loss": 0.2636, "step": 869 }, { "epoch": 0.24, "grad_norm": 3.276172911418095, "learning_rate": 9.93680577501882e-06, "loss": 0.2866, "step": 870 }, { "epoch": 0.24, "grad_norm": 3.6143399487223853, "learning_rate": 9.93657200218512e-06, "loss": 0.2885, "step": 871 }, { "epoch": 0.24, "grad_norm": 3.4778218379995374, "learning_rate": 9.936337800514377e-06, "loss": 0.2913, "step": 872 }, { "epoch": 0.24, "grad_norm": 3.340457203848266, "learning_rate": 9.936103170026934e-06, "loss": 0.2513, "step": 873 }, { "epoch": 0.24, "grad_norm": 3.559938192438544, "learning_rate": 9.935868110743175e-06, "loss": 0.3287, "step": 874 }, { "epoch": 0.24, "grad_norm": 3.3847174795028616, "learning_rate": 9.935632622683518e-06, "loss": 0.2891, "step": 875 }, { "epoch": 0.24, "grad_norm": 3.056051455197346, "learning_rate": 9.93539670586842e-06, "loss": 0.228, "step": 876 }, { "epoch": 0.24, "grad_norm": 3.5349518806084133, "learning_rate": 9.935160360318376e-06, "loss": 0.2646, "step": 877 }, { "epoch": 0.24, "grad_norm": 3.235116153391338, "learning_rate": 9.934923586053917e-06, "loss": 0.2633, "step": 878 }, { "epoch": 0.24, "grad_norm": 3.4477474718655348, "learning_rate": 9.93468638309561e-06, "loss": 0.2708, "step": 879 }, { "epoch": 0.24, "grad_norm": 3.4013210275500563, "learning_rate": 9.934448751464064e-06, "loss": 0.2771, "step": 880 }, { "epoch": 0.24, "grad_norm": 3.088117903879566, "learning_rate": 9.934210691179918e-06, "loss": 0.2546, "step": 881 }, { "epoch": 0.24, "grad_norm": 3.382518928898099, "learning_rate": 9.933972202263853e-06, "loss": 0.2716, "step": 882 }, { "epoch": 0.24, "grad_norm": 3.7159108465171986, "learning_rate": 9.933733284736588e-06, "loss": 0.2795, "step": 883 }, { "epoch": 0.24, "grad_norm": 2.8373461775254762, "learning_rate": 9.933493938618878e-06, "loss": 0.2504, "step": 884 }, { "epoch": 0.24, "grad_norm": 3.3390583871704256, "learning_rate": 9.933254163931512e-06, "loss": 0.2943, "step": 885 }, { "epoch": 0.24, "grad_norm": 3.2670928887377344, "learning_rate": 9.933013960695321e-06, "loss": 0.276, "step": 886 }, { "epoch": 0.24, "grad_norm": 3.3618465757078155, "learning_rate": 9.932773328931171e-06, "loss": 0.2638, "step": 887 }, { "epoch": 0.24, "grad_norm": 3.1766026470572344, "learning_rate": 9.932532268659966e-06, "loss": 0.277, "step": 888 }, { "epoch": 0.24, "grad_norm": 3.007161501151894, "learning_rate": 9.932290779902648e-06, "loss": 0.2496, "step": 889 }, { "epoch": 0.24, "grad_norm": 3.3360649173645647, "learning_rate": 9.93204886268019e-06, "loss": 0.2541, "step": 890 }, { "epoch": 0.24, "grad_norm": 3.5294014099423774, "learning_rate": 9.931806517013612e-06, "loss": 0.3163, "step": 891 }, { "epoch": 0.24, "grad_norm": 3.7764181028997834, "learning_rate": 9.931563742923967e-06, "loss": 0.3239, "step": 892 }, { "epoch": 0.24, "grad_norm": 3.4927472302130713, "learning_rate": 9.931320540432339e-06, "loss": 0.3105, "step": 893 }, { "epoch": 0.24, "grad_norm": 2.912071475306121, "learning_rate": 9.93107690955986e-06, "loss": 0.2616, "step": 894 }, { "epoch": 0.24, "grad_norm": 2.8986147938294793, "learning_rate": 9.930832850327693e-06, "loss": 0.2512, "step": 895 }, { "epoch": 0.24, "grad_norm": 2.958875753553084, "learning_rate": 9.930588362757038e-06, "loss": 0.2461, "step": 896 }, { "epoch": 0.24, "grad_norm": 3.580466817979422, "learning_rate": 9.930343446869134e-06, "loss": 0.2555, "step": 897 }, { "epoch": 0.25, "grad_norm": 3.063186430828628, "learning_rate": 9.93009810268526e-06, "loss": 0.297, "step": 898 }, { "epoch": 0.25, "grad_norm": 3.134047207459698, "learning_rate": 9.929852330226723e-06, "loss": 0.2636, "step": 899 }, { "epoch": 0.25, "grad_norm": 4.242664574250869, "learning_rate": 9.929606129514875e-06, "loss": 0.2932, "step": 900 }, { "epoch": 0.25, "grad_norm": 3.3303198387606514, "learning_rate": 9.929359500571108e-06, "loss": 0.2545, "step": 901 }, { "epoch": 0.25, "grad_norm": 3.865057469300579, "learning_rate": 9.92911244341684e-06, "loss": 0.3238, "step": 902 }, { "epoch": 0.25, "grad_norm": 3.2447404443038073, "learning_rate": 9.928864958073536e-06, "loss": 0.2332, "step": 903 }, { "epoch": 0.25, "grad_norm": 3.0135428087406364, "learning_rate": 9.928617044562695e-06, "loss": 0.2314, "step": 904 }, { "epoch": 0.25, "grad_norm": 3.617340165307831, "learning_rate": 9.92836870290585e-06, "loss": 0.3012, "step": 905 }, { "epoch": 0.25, "grad_norm": 3.47175086119539, "learning_rate": 9.92811993312458e-06, "loss": 0.2722, "step": 906 }, { "epoch": 0.25, "grad_norm": 3.1705522076845174, "learning_rate": 9.927870735240492e-06, "loss": 0.2815, "step": 907 }, { "epoch": 0.25, "grad_norm": 3.0077622773513553, "learning_rate": 9.927621109275233e-06, "loss": 0.2663, "step": 908 }, { "epoch": 0.25, "grad_norm": 3.2973207485974556, "learning_rate": 9.927371055250489e-06, "loss": 0.2544, "step": 909 }, { "epoch": 0.25, "grad_norm": 3.1272857794777784, "learning_rate": 9.927120573187981e-06, "loss": 0.2598, "step": 910 }, { "epoch": 0.25, "grad_norm": 3.4522236094285303, "learning_rate": 9.92686966310947e-06, "loss": 0.283, "step": 911 }, { "epoch": 0.25, "grad_norm": 2.9510192069679353, "learning_rate": 9.926618325036752e-06, "loss": 0.2777, "step": 912 }, { "epoch": 0.25, "grad_norm": 3.657729761965793, "learning_rate": 9.926366558991659e-06, "loss": 0.2584, "step": 913 }, { "epoch": 0.25, "grad_norm": 3.065916477202066, "learning_rate": 9.926114364996063e-06, "loss": 0.2407, "step": 914 }, { "epoch": 0.25, "grad_norm": 3.082148895459199, "learning_rate": 9.925861743071872e-06, "loss": 0.2642, "step": 915 }, { "epoch": 0.25, "grad_norm": 3.386573938151328, "learning_rate": 9.925608693241031e-06, "loss": 0.2665, "step": 916 }, { "epoch": 0.25, "grad_norm": 3.262545697249871, "learning_rate": 9.925355215525523e-06, "loss": 0.2707, "step": 917 }, { "epoch": 0.25, "grad_norm": 2.953267251908424, "learning_rate": 9.925101309947365e-06, "loss": 0.2235, "step": 918 }, { "epoch": 0.25, "grad_norm": 3.2239983903259875, "learning_rate": 9.924846976528618e-06, "loss": 0.26, "step": 919 }, { "epoch": 0.25, "grad_norm": 3.6974143702568694, "learning_rate": 9.924592215291368e-06, "loss": 0.2403, "step": 920 }, { "epoch": 0.25, "grad_norm": 3.35068535670828, "learning_rate": 9.924337026257756e-06, "loss": 0.2523, "step": 921 }, { "epoch": 0.25, "grad_norm": 3.221139260263104, "learning_rate": 9.924081409449943e-06, "loss": 0.2583, "step": 922 }, { "epoch": 0.25, "grad_norm": 3.5040974384723786, "learning_rate": 9.923825364890137e-06, "loss": 0.2846, "step": 923 }, { "epoch": 0.25, "grad_norm": 2.9705703472482505, "learning_rate": 9.923568892600579e-06, "loss": 0.2721, "step": 924 }, { "epoch": 0.25, "grad_norm": 3.1974979222629516, "learning_rate": 9.92331199260355e-06, "loss": 0.2918, "step": 925 }, { "epoch": 0.25, "grad_norm": 3.2392929177127923, "learning_rate": 9.923054664921366e-06, "loss": 0.2799, "step": 926 }, { "epoch": 0.25, "grad_norm": 3.3223309521614284, "learning_rate": 9.922796909576383e-06, "loss": 0.2851, "step": 927 }, { "epoch": 0.25, "grad_norm": 3.002716741800669, "learning_rate": 9.922538726590987e-06, "loss": 0.3056, "step": 928 }, { "epoch": 0.25, "grad_norm": 3.226612579952788, "learning_rate": 9.92228011598761e-06, "loss": 0.2518, "step": 929 }, { "epoch": 0.25, "grad_norm": 3.0239036497117975, "learning_rate": 9.922021077788717e-06, "loss": 0.2408, "step": 930 }, { "epoch": 0.25, "grad_norm": 3.1996834708257884, "learning_rate": 9.92176161201681e-06, "loss": 0.2859, "step": 931 }, { "epoch": 0.25, "grad_norm": 3.1060558343210904, "learning_rate": 9.921501718694431e-06, "loss": 0.2698, "step": 932 }, { "epoch": 0.25, "grad_norm": 3.466907669704993, "learning_rate": 9.921241397844153e-06, "loss": 0.2788, "step": 933 }, { "epoch": 0.25, "grad_norm": 3.2084457047284394, "learning_rate": 9.920980649488591e-06, "loss": 0.2642, "step": 934 }, { "epoch": 0.26, "grad_norm": 3.279648066311588, "learning_rate": 9.920719473650397e-06, "loss": 0.2792, "step": 935 }, { "epoch": 0.26, "grad_norm": 3.106194111971018, "learning_rate": 9.920457870352259e-06, "loss": 0.2321, "step": 936 }, { "epoch": 0.26, "grad_norm": 3.1934582290410654, "learning_rate": 9.920195839616901e-06, "loss": 0.2803, "step": 937 }, { "epoch": 0.26, "grad_norm": 3.292074374731251, "learning_rate": 9.919933381467088e-06, "loss": 0.2687, "step": 938 }, { "epoch": 0.26, "grad_norm": 3.1994067582994976, "learning_rate": 9.919670495925618e-06, "loss": 0.2738, "step": 939 }, { "epoch": 0.26, "grad_norm": 3.415954983887142, "learning_rate": 9.919407183015327e-06, "loss": 0.2858, "step": 940 }, { "epoch": 0.26, "grad_norm": 3.3381735172147082, "learning_rate": 9.91914344275909e-06, "loss": 0.2781, "step": 941 }, { "epoch": 0.26, "grad_norm": 3.61713607697877, "learning_rate": 9.918879275179819e-06, "loss": 0.2608, "step": 942 }, { "epoch": 0.26, "grad_norm": 3.2805387980907814, "learning_rate": 9.918614680300458e-06, "loss": 0.296, "step": 943 }, { "epoch": 0.26, "grad_norm": 3.2086112749813966, "learning_rate": 9.918349658143997e-06, "loss": 0.277, "step": 944 }, { "epoch": 0.26, "grad_norm": 3.1151483528131627, "learning_rate": 9.918084208733454e-06, "loss": 0.2805, "step": 945 }, { "epoch": 0.26, "grad_norm": 2.903697337675332, "learning_rate": 9.917818332091892e-06, "loss": 0.2662, "step": 946 }, { "epoch": 0.26, "grad_norm": 2.9618248080189367, "learning_rate": 9.917552028242406e-06, "loss": 0.2387, "step": 947 }, { "epoch": 0.26, "grad_norm": 2.6805600533646814, "learning_rate": 9.91728529720813e-06, "loss": 0.234, "step": 948 }, { "epoch": 0.26, "grad_norm": 3.252167376273173, "learning_rate": 9.917018139012236e-06, "loss": 0.2739, "step": 949 }, { "epoch": 0.26, "grad_norm": 2.706046767878392, "learning_rate": 9.916750553677929e-06, "loss": 0.2392, "step": 950 }, { "epoch": 0.26, "grad_norm": 3.35702234988839, "learning_rate": 9.916482541228456e-06, "loss": 0.3126, "step": 951 }, { "epoch": 0.26, "grad_norm": 3.2029297541061816, "learning_rate": 9.916214101687096e-06, "loss": 0.2804, "step": 952 }, { "epoch": 0.26, "grad_norm": 3.0002871366757327, "learning_rate": 9.915945235077173e-06, "loss": 0.2496, "step": 953 }, { "epoch": 0.26, "grad_norm": 2.5733977270923636, "learning_rate": 9.915675941422042e-06, "loss": 0.2079, "step": 954 }, { "epoch": 0.26, "grad_norm": 3.4243578854293064, "learning_rate": 9.915406220745093e-06, "loss": 0.3017, "step": 955 }, { "epoch": 0.26, "grad_norm": 2.9458666508721474, "learning_rate": 9.915136073069759e-06, "loss": 0.2417, "step": 956 }, { "epoch": 0.26, "grad_norm": 3.0923961214081066, "learning_rate": 9.91486549841951e-06, "loss": 0.2454, "step": 957 }, { "epoch": 0.26, "grad_norm": 3.4901091314863693, "learning_rate": 9.914594496817846e-06, "loss": 0.2728, "step": 958 }, { "epoch": 0.26, "grad_norm": 2.9784470235241427, "learning_rate": 9.914323068288312e-06, "loss": 0.252, "step": 959 }, { "epoch": 0.26, "grad_norm": 3.0681060236739826, "learning_rate": 9.914051212854484e-06, "loss": 0.2962, "step": 960 }, { "epoch": 0.26, "grad_norm": 2.7016575028420533, "learning_rate": 9.91377893053998e-06, "loss": 0.2126, "step": 961 }, { "epoch": 0.26, "grad_norm": 3.2050958677541117, "learning_rate": 9.913506221368455e-06, "loss": 0.2445, "step": 962 }, { "epoch": 0.26, "grad_norm": 3.126204998663041, "learning_rate": 9.913233085363595e-06, "loss": 0.2272, "step": 963 }, { "epoch": 0.26, "grad_norm": 3.3519564433198252, "learning_rate": 9.912959522549126e-06, "loss": 0.2455, "step": 964 }, { "epoch": 0.26, "grad_norm": 3.2199417712369023, "learning_rate": 9.912685532948819e-06, "loss": 0.234, "step": 965 }, { "epoch": 0.26, "grad_norm": 3.468102420508789, "learning_rate": 9.912411116586469e-06, "loss": 0.31, "step": 966 }, { "epoch": 0.26, "grad_norm": 3.2211199656799425, "learning_rate": 9.912136273485917e-06, "loss": 0.2797, "step": 967 }, { "epoch": 0.26, "grad_norm": 3.046776038463699, "learning_rate": 9.91186100367104e-06, "loss": 0.2551, "step": 968 }, { "epoch": 0.26, "grad_norm": 3.0177188372682866, "learning_rate": 9.911585307165747e-06, "loss": 0.2377, "step": 969 }, { "epoch": 0.26, "grad_norm": 3.0982130995189543, "learning_rate": 9.911309183993988e-06, "loss": 0.2672, "step": 970 }, { "epoch": 0.27, "grad_norm": 2.8651599495662263, "learning_rate": 9.911032634179754e-06, "loss": 0.2588, "step": 971 }, { "epoch": 0.27, "grad_norm": 3.185402106015171, "learning_rate": 9.910755657747064e-06, "loss": 0.279, "step": 972 }, { "epoch": 0.27, "grad_norm": 3.014181446254031, "learning_rate": 9.910478254719983e-06, "loss": 0.2432, "step": 973 }, { "epoch": 0.27, "grad_norm": 3.501906145237206, "learning_rate": 9.910200425122603e-06, "loss": 0.3076, "step": 974 }, { "epoch": 0.27, "grad_norm": 3.353380746290943, "learning_rate": 9.909922168979063e-06, "loss": 0.2716, "step": 975 }, { "epoch": 0.27, "grad_norm": 3.4491161780728565, "learning_rate": 9.909643486313533e-06, "loss": 0.3122, "step": 976 }, { "epoch": 0.27, "grad_norm": 3.1748124863854725, "learning_rate": 9.909364377150226e-06, "loss": 0.2623, "step": 977 }, { "epoch": 0.27, "grad_norm": 3.1485172269620842, "learning_rate": 9.909084841513383e-06, "loss": 0.2303, "step": 978 }, { "epoch": 0.27, "grad_norm": 3.003349431867429, "learning_rate": 9.90880487942729e-06, "loss": 0.2332, "step": 979 }, { "epoch": 0.27, "grad_norm": 3.2354365471636934, "learning_rate": 9.908524490916267e-06, "loss": 0.2516, "step": 980 }, { "epoch": 0.27, "grad_norm": 3.234736477686663, "learning_rate": 9.90824367600467e-06, "loss": 0.2754, "step": 981 }, { "epoch": 0.27, "grad_norm": 4.069791878099338, "learning_rate": 9.907962434716894e-06, "loss": 0.2719, "step": 982 }, { "epoch": 0.27, "grad_norm": 2.967540685989675, "learning_rate": 9.90768076707737e-06, "loss": 0.2356, "step": 983 }, { "epoch": 0.27, "grad_norm": 3.565257987898602, "learning_rate": 9.907398673110565e-06, "loss": 0.3098, "step": 984 }, { "epoch": 0.27, "grad_norm": 3.409556460052405, "learning_rate": 9.907116152840987e-06, "loss": 0.2963, "step": 985 }, { "epoch": 0.27, "grad_norm": 3.0627131576161557, "learning_rate": 9.906833206293177e-06, "loss": 0.2614, "step": 986 }, { "epoch": 0.27, "grad_norm": 3.143487194763398, "learning_rate": 9.906549833491714e-06, "loss": 0.2304, "step": 987 }, { "epoch": 0.27, "grad_norm": 3.525379915086094, "learning_rate": 9.906266034461216e-06, "loss": 0.2717, "step": 988 }, { "epoch": 0.27, "grad_norm": 3.2134697424710996, "learning_rate": 9.905981809226334e-06, "loss": 0.2426, "step": 989 }, { "epoch": 0.27, "grad_norm": 3.4192533850413236, "learning_rate": 9.905697157811761e-06, "loss": 0.2644, "step": 990 }, { "epoch": 0.27, "grad_norm": 3.5101120655373146, "learning_rate": 9.905412080242222e-06, "loss": 0.2596, "step": 991 }, { "epoch": 0.27, "grad_norm": 3.3518818908320664, "learning_rate": 9.905126576542485e-06, "loss": 0.2846, "step": 992 }, { "epoch": 0.27, "grad_norm": 4.108242466710249, "learning_rate": 9.904840646737346e-06, "loss": 0.2566, "step": 993 }, { "epoch": 0.27, "grad_norm": 3.253718175470579, "learning_rate": 9.904554290851648e-06, "loss": 0.2855, "step": 994 }, { "epoch": 0.27, "grad_norm": 3.172997631569075, "learning_rate": 9.904267508910269e-06, "loss": 0.2631, "step": 995 }, { "epoch": 0.27, "grad_norm": 3.304507528666179, "learning_rate": 9.903980300938115e-06, "loss": 0.2742, "step": 996 }, { "epoch": 0.27, "grad_norm": 2.928623139741482, "learning_rate": 9.903692666960139e-06, "loss": 0.2354, "step": 997 }, { "epoch": 0.27, "grad_norm": 3.217885043210312, "learning_rate": 9.903404607001325e-06, "loss": 0.2561, "step": 998 }, { "epoch": 0.27, "grad_norm": 3.1953572178467553, "learning_rate": 9.903116121086703e-06, "loss": 0.2629, "step": 999 }, { "epoch": 0.27, "grad_norm": 3.1947480136099347, "learning_rate": 9.902827209241326e-06, "loss": 0.249, "step": 1000 }, { "epoch": 0.27, "grad_norm": 3.3561782992489233, "learning_rate": 9.902537871490297e-06, "loss": 0.2547, "step": 1001 }, { "epoch": 0.27, "grad_norm": 3.8556978096661165, "learning_rate": 9.902248107858747e-06, "loss": 0.3023, "step": 1002 }, { "epoch": 0.27, "grad_norm": 2.935430286506648, "learning_rate": 9.901957918371851e-06, "loss": 0.2313, "step": 1003 }, { "epoch": 0.27, "grad_norm": 3.257278084056097, "learning_rate": 9.901667303054814e-06, "loss": 0.2312, "step": 1004 }, { "epoch": 0.27, "grad_norm": 3.2722066456768424, "learning_rate": 9.901376261932885e-06, "loss": 0.29, "step": 1005 }, { "epoch": 0.27, "grad_norm": 3.5281381534322533, "learning_rate": 9.901084795031344e-06, "loss": 0.267, "step": 1006 }, { "epoch": 0.27, "grad_norm": 3.193621687136732, "learning_rate": 9.900792902375512e-06, "loss": 0.2699, "step": 1007 }, { "epoch": 0.28, "grad_norm": 3.7455931938218647, "learning_rate": 9.900500583990744e-06, "loss": 0.2959, "step": 1008 }, { "epoch": 0.28, "grad_norm": 3.613484087345668, "learning_rate": 9.900207839902436e-06, "loss": 0.2481, "step": 1009 }, { "epoch": 0.28, "grad_norm": 3.0810597686054075, "learning_rate": 9.899914670136016e-06, "loss": 0.2623, "step": 1010 }, { "epoch": 0.28, "grad_norm": 2.8907600619594698, "learning_rate": 9.899621074716954e-06, "loss": 0.2483, "step": 1011 }, { "epoch": 0.28, "grad_norm": 3.0890777839868737, "learning_rate": 9.899327053670751e-06, "loss": 0.228, "step": 1012 }, { "epoch": 0.28, "grad_norm": 2.9832103393375506, "learning_rate": 9.899032607022952e-06, "loss": 0.2415, "step": 1013 }, { "epoch": 0.28, "grad_norm": 3.4087419607000222, "learning_rate": 9.898737734799134e-06, "loss": 0.2566, "step": 1014 }, { "epoch": 0.28, "grad_norm": 3.3485029500827044, "learning_rate": 9.89844243702491e-06, "loss": 0.2733, "step": 1015 }, { "epoch": 0.28, "grad_norm": 2.799907698810353, "learning_rate": 9.898146713725937e-06, "loss": 0.2219, "step": 1016 }, { "epoch": 0.28, "grad_norm": 3.306839757064171, "learning_rate": 9.8978505649279e-06, "loss": 0.2681, "step": 1017 }, { "epoch": 0.28, "grad_norm": 3.2337710986801267, "learning_rate": 9.897553990656528e-06, "loss": 0.2703, "step": 1018 }, { "epoch": 0.28, "grad_norm": 4.757213785073275, "learning_rate": 9.897256990937583e-06, "loss": 0.3208, "step": 1019 }, { "epoch": 0.28, "grad_norm": 3.0881498453707765, "learning_rate": 9.896959565796865e-06, "loss": 0.2288, "step": 1020 }, { "epoch": 0.28, "grad_norm": 3.033413308459793, "learning_rate": 9.896661715260213e-06, "loss": 0.2553, "step": 1021 }, { "epoch": 0.28, "grad_norm": 3.4432396085433408, "learning_rate": 9.896363439353499e-06, "loss": 0.2794, "step": 1022 }, { "epoch": 0.28, "grad_norm": 3.1218470624167893, "learning_rate": 9.896064738102635e-06, "loss": 0.2652, "step": 1023 }, { "epoch": 0.28, "grad_norm": 3.3103158980156056, "learning_rate": 9.895765611533568e-06, "loss": 0.2933, "step": 1024 }, { "epoch": 0.28, "grad_norm": 3.40077800866619, "learning_rate": 9.895466059672284e-06, "loss": 0.2672, "step": 1025 }, { "epoch": 0.28, "grad_norm": 3.1978108022936005, "learning_rate": 9.895166082544807e-06, "loss": 0.2422, "step": 1026 }, { "epoch": 0.28, "grad_norm": 3.176225234842347, "learning_rate": 9.89486568017719e-06, "loss": 0.2486, "step": 1027 }, { "epoch": 0.28, "grad_norm": 3.139861385729894, "learning_rate": 9.894564852595535e-06, "loss": 0.2706, "step": 1028 }, { "epoch": 0.28, "grad_norm": 3.024639904274534, "learning_rate": 9.89426359982597e-06, "loss": 0.2248, "step": 1029 }, { "epoch": 0.28, "grad_norm": 3.0409580415425523, "learning_rate": 9.893961921894668e-06, "loss": 0.2446, "step": 1030 }, { "epoch": 0.28, "grad_norm": 29.481094777662676, "learning_rate": 9.893659818827834e-06, "loss": 0.2903, "step": 1031 }, { "epoch": 0.28, "grad_norm": 3.3411641832779577, "learning_rate": 9.893357290651712e-06, "loss": 0.2682, "step": 1032 }, { "epoch": 0.28, "grad_norm": 3.2154849374842884, "learning_rate": 9.89305433739258e-06, "loss": 0.2557, "step": 1033 }, { "epoch": 0.28, "grad_norm": 3.282477206424088, "learning_rate": 9.89275095907676e-06, "loss": 0.2586, "step": 1034 }, { "epoch": 0.28, "grad_norm": 3.3687299584791632, "learning_rate": 9.892447155730603e-06, "loss": 0.2851, "step": 1035 }, { "epoch": 0.28, "grad_norm": 3.11629746597544, "learning_rate": 9.892142927380502e-06, "loss": 0.2544, "step": 1036 }, { "epoch": 0.28, "grad_norm": 3.23760662814848, "learning_rate": 9.891838274052882e-06, "loss": 0.2473, "step": 1037 }, { "epoch": 0.28, "grad_norm": 3.2476169173451708, "learning_rate": 9.89153319577421e-06, "loss": 0.2561, "step": 1038 }, { "epoch": 0.28, "grad_norm": 3.258902731779671, "learning_rate": 9.89122769257099e-06, "loss": 0.3059, "step": 1039 }, { "epoch": 0.28, "grad_norm": 4.652080752292637, "learning_rate": 9.890921764469759e-06, "loss": 0.2743, "step": 1040 }, { "epoch": 0.28, "grad_norm": 4.452074246162822, "learning_rate": 9.890615411497094e-06, "loss": 0.2742, "step": 1041 }, { "epoch": 0.28, "grad_norm": 4.000403217156622, "learning_rate": 9.890308633679604e-06, "loss": 0.2455, "step": 1042 }, { "epoch": 0.28, "grad_norm": 8.19328378948334, "learning_rate": 9.890001431043941e-06, "loss": 0.2535, "step": 1043 }, { "epoch": 0.29, "grad_norm": 3.4545661876685605, "learning_rate": 9.889693803616793e-06, "loss": 0.267, "step": 1044 }, { "epoch": 0.29, "grad_norm": 3.8058250540756426, "learning_rate": 9.889385751424882e-06, "loss": 0.281, "step": 1045 }, { "epoch": 0.29, "grad_norm": 19.546375324065433, "learning_rate": 9.889077274494967e-06, "loss": 0.233, "step": 1046 }, { "epoch": 0.29, "grad_norm": 2.9125999345132505, "learning_rate": 9.888768372853849e-06, "loss": 0.2319, "step": 1047 }, { "epoch": 0.29, "grad_norm": 13.01413418160032, "learning_rate": 9.888459046528358e-06, "loss": 0.2656, "step": 1048 }, { "epoch": 0.29, "grad_norm": 3.6235588986227585, "learning_rate": 9.888149295545367e-06, "loss": 0.221, "step": 1049 }, { "epoch": 0.29, "grad_norm": 3.388174311186669, "learning_rate": 9.887839119931783e-06, "loss": 0.2877, "step": 1050 }, { "epoch": 0.29, "grad_norm": 3.25830270365746, "learning_rate": 9.887528519714554e-06, "loss": 0.2691, "step": 1051 }, { "epoch": 0.29, "grad_norm": 4.146037450476557, "learning_rate": 9.887217494920655e-06, "loss": 0.274, "step": 1052 }, { "epoch": 0.29, "grad_norm": 3.4675280873830836, "learning_rate": 9.886906045577111e-06, "loss": 0.2902, "step": 1053 }, { "epoch": 0.29, "grad_norm": 3.5954757774160786, "learning_rate": 9.886594171710975e-06, "loss": 0.252, "step": 1054 }, { "epoch": 0.29, "grad_norm": 3.1538211304333394, "learning_rate": 9.886281873349338e-06, "loss": 0.2627, "step": 1055 }, { "epoch": 0.29, "grad_norm": 3.011755539575295, "learning_rate": 9.885969150519332e-06, "loss": 0.2458, "step": 1056 }, { "epoch": 0.29, "grad_norm": 3.434800138844234, "learning_rate": 9.88565600324812e-06, "loss": 0.2551, "step": 1057 }, { "epoch": 0.29, "grad_norm": 3.224425124524942, "learning_rate": 9.885342431562907e-06, "loss": 0.2516, "step": 1058 }, { "epoch": 0.29, "grad_norm": 3.0796506590369255, "learning_rate": 9.88502843549093e-06, "loss": 0.23, "step": 1059 }, { "epoch": 0.29, "grad_norm": 3.59875495565925, "learning_rate": 9.884714015059472e-06, "loss": 0.27, "step": 1060 }, { "epoch": 0.29, "grad_norm": 3.446816424246436, "learning_rate": 9.884399170295839e-06, "loss": 0.2758, "step": 1061 }, { "epoch": 0.29, "grad_norm": 3.6709401080186677, "learning_rate": 9.884083901227387e-06, "loss": 0.276, "step": 1062 }, { "epoch": 0.29, "grad_norm": 3.348570107464168, "learning_rate": 9.883768207881498e-06, "loss": 0.2543, "step": 1063 }, { "epoch": 0.29, "grad_norm": 3.323242320209527, "learning_rate": 9.8834520902856e-06, "loss": 0.2809, "step": 1064 }, { "epoch": 0.29, "grad_norm": 3.2254075723025024, "learning_rate": 9.883135548467155e-06, "loss": 0.2396, "step": 1065 }, { "epoch": 0.29, "grad_norm": 3.234494384024653, "learning_rate": 9.882818582453657e-06, "loss": 0.2462, "step": 1066 }, { "epoch": 0.29, "grad_norm": 3.0802528203355988, "learning_rate": 9.882501192272642e-06, "loss": 0.225, "step": 1067 }, { "epoch": 0.29, "grad_norm": 3.235901460182486, "learning_rate": 9.882183377951683e-06, "loss": 0.2901, "step": 1068 }, { "epoch": 0.29, "grad_norm": 3.3730989499581585, "learning_rate": 9.881865139518387e-06, "loss": 0.2823, "step": 1069 }, { "epoch": 0.29, "grad_norm": 3.2380187995918095, "learning_rate": 9.8815464770004e-06, "loss": 0.2252, "step": 1070 }, { "epoch": 0.29, "grad_norm": 2.9575063275415405, "learning_rate": 9.881227390425404e-06, "loss": 0.2126, "step": 1071 }, { "epoch": 0.29, "grad_norm": 3.367039932864783, "learning_rate": 9.880907879821115e-06, "loss": 0.2389, "step": 1072 }, { "epoch": 0.29, "grad_norm": 3.403928509546694, "learning_rate": 9.880587945215292e-06, "loss": 0.3178, "step": 1073 }, { "epoch": 0.29, "grad_norm": 3.124601210349124, "learning_rate": 9.880267586635726e-06, "loss": 0.2979, "step": 1074 }, { "epoch": 0.29, "grad_norm": 3.2124308545267084, "learning_rate": 9.879946804110248e-06, "loss": 0.2806, "step": 1075 }, { "epoch": 0.29, "grad_norm": 3.2247892737126724, "learning_rate": 9.879625597666721e-06, "loss": 0.226, "step": 1076 }, { "epoch": 0.29, "grad_norm": 3.072346495670894, "learning_rate": 9.879303967333053e-06, "loss": 0.2681, "step": 1077 }, { "epoch": 0.29, "grad_norm": 3.0390647081716544, "learning_rate": 9.878981913137178e-06, "loss": 0.2446, "step": 1078 }, { "epoch": 0.29, "grad_norm": 2.8096996690715965, "learning_rate": 9.878659435107078e-06, "loss": 0.1933, "step": 1079 }, { "epoch": 0.29, "grad_norm": 3.2478314529989514, "learning_rate": 9.878336533270763e-06, "loss": 0.2477, "step": 1080 }, { "epoch": 0.3, "grad_norm": 2.8966634683247876, "learning_rate": 9.878013207656285e-06, "loss": 0.2187, "step": 1081 }, { "epoch": 0.3, "grad_norm": 2.9379312569246974, "learning_rate": 9.87768945829173e-06, "loss": 0.2627, "step": 1082 }, { "epoch": 0.3, "grad_norm": 3.247422948684305, "learning_rate": 9.87736528520522e-06, "loss": 0.2769, "step": 1083 }, { "epoch": 0.3, "grad_norm": 3.145370943131256, "learning_rate": 9.877040688424922e-06, "loss": 0.2742, "step": 1084 }, { "epoch": 0.3, "grad_norm": 2.9207543622331165, "learning_rate": 9.876715667979026e-06, "loss": 0.2456, "step": 1085 }, { "epoch": 0.3, "grad_norm": 2.9626672840173347, "learning_rate": 9.876390223895774e-06, "loss": 0.2368, "step": 1086 }, { "epoch": 0.3, "grad_norm": 3.0740793830612394, "learning_rate": 9.87606435620343e-06, "loss": 0.2419, "step": 1087 }, { "epoch": 0.3, "grad_norm": 3.202432612698249, "learning_rate": 9.875738064930305e-06, "loss": 0.2533, "step": 1088 }, { "epoch": 0.3, "grad_norm": 3.068348943939479, "learning_rate": 9.875411350104745e-06, "loss": 0.2431, "step": 1089 }, { "epoch": 0.3, "grad_norm": 2.7994529309050415, "learning_rate": 9.875084211755127e-06, "loss": 0.273, "step": 1090 }, { "epoch": 0.3, "grad_norm": 3.094993820474425, "learning_rate": 9.874756649909877e-06, "loss": 0.2552, "step": 1091 }, { "epoch": 0.3, "grad_norm": 3.421990053116304, "learning_rate": 9.874428664597444e-06, "loss": 0.2584, "step": 1092 }, { "epoch": 0.3, "grad_norm": 2.6811195491293667, "learning_rate": 9.874100255846321e-06, "loss": 0.2061, "step": 1093 }, { "epoch": 0.3, "grad_norm": 3.5105282456220395, "learning_rate": 9.873771423685037e-06, "loss": 0.2912, "step": 1094 }, { "epoch": 0.3, "grad_norm": 3.1829499290409946, "learning_rate": 9.873442168142158e-06, "loss": 0.2651, "step": 1095 }, { "epoch": 0.3, "grad_norm": 2.8293048018507916, "learning_rate": 9.873112489246286e-06, "loss": 0.2112, "step": 1096 }, { "epoch": 0.3, "grad_norm": 2.9748134768529795, "learning_rate": 9.872782387026061e-06, "loss": 0.2396, "step": 1097 }, { "epoch": 0.3, "grad_norm": 3.0003203104271097, "learning_rate": 9.872451861510157e-06, "loss": 0.2598, "step": 1098 }, { "epoch": 0.3, "grad_norm": 3.328814951343707, "learning_rate": 9.872120912727286e-06, "loss": 0.2867, "step": 1099 }, { "epoch": 0.3, "grad_norm": 2.6764398092307036, "learning_rate": 9.8717895407062e-06, "loss": 0.2308, "step": 1100 }, { "epoch": 0.3, "grad_norm": 3.1102285081738597, "learning_rate": 9.871457745475682e-06, "loss": 0.2513, "step": 1101 }, { "epoch": 0.3, "grad_norm": 3.474952823633108, "learning_rate": 9.871125527064559e-06, "loss": 0.3014, "step": 1102 }, { "epoch": 0.3, "grad_norm": 3.1945482815235877, "learning_rate": 9.870792885501686e-06, "loss": 0.2602, "step": 1103 }, { "epoch": 0.3, "grad_norm": 2.7806017922969644, "learning_rate": 9.87045982081596e-06, "loss": 0.2249, "step": 1104 }, { "epoch": 0.3, "grad_norm": 2.889373640351625, "learning_rate": 9.870126333036318e-06, "loss": 0.2212, "step": 1105 }, { "epoch": 0.3, "grad_norm": 3.137399443034838, "learning_rate": 9.869792422191727e-06, "loss": 0.2845, "step": 1106 }, { "epoch": 0.3, "grad_norm": 3.5451088105403756, "learning_rate": 9.869458088311195e-06, "loss": 0.2891, "step": 1107 }, { "epoch": 0.3, "grad_norm": 2.8611552367886093, "learning_rate": 9.869123331423763e-06, "loss": 0.2412, "step": 1108 }, { "epoch": 0.3, "grad_norm": 3.139798328264714, "learning_rate": 9.868788151558513e-06, "loss": 0.2739, "step": 1109 }, { "epoch": 0.3, "grad_norm": 3.120058465279173, "learning_rate": 9.868452548744563e-06, "loss": 0.2506, "step": 1110 }, { "epoch": 0.3, "grad_norm": 3.4306934130748865, "learning_rate": 9.868116523011063e-06, "loss": 0.2798, "step": 1111 }, { "epoch": 0.3, "grad_norm": 3.0094312404190973, "learning_rate": 9.867780074387207e-06, "loss": 0.2247, "step": 1112 }, { "epoch": 0.3, "grad_norm": 3.121912789875263, "learning_rate": 9.86744320290222e-06, "loss": 0.2532, "step": 1113 }, { "epoch": 0.3, "grad_norm": 2.8532544937814346, "learning_rate": 9.867105908585366e-06, "loss": 0.2292, "step": 1114 }, { "epoch": 0.3, "grad_norm": 2.8893076443611174, "learning_rate": 9.866768191465946e-06, "loss": 0.1885, "step": 1115 }, { "epoch": 0.3, "grad_norm": 3.3440291657838146, "learning_rate": 9.866430051573296e-06, "loss": 0.2732, "step": 1116 }, { "epoch": 0.3, "grad_norm": 2.9496134563799656, "learning_rate": 9.866091488936795e-06, "loss": 0.2683, "step": 1117 }, { "epoch": 0.31, "grad_norm": 3.0244362098680764, "learning_rate": 9.865752503585848e-06, "loss": 0.2496, "step": 1118 }, { "epoch": 0.31, "grad_norm": 3.3000625365787393, "learning_rate": 9.865413095549903e-06, "loss": 0.2937, "step": 1119 }, { "epoch": 0.31, "grad_norm": 3.2077587746808645, "learning_rate": 9.865073264858447e-06, "loss": 0.2792, "step": 1120 }, { "epoch": 0.31, "grad_norm": 2.880856128571587, "learning_rate": 9.864733011541e-06, "loss": 0.2744, "step": 1121 }, { "epoch": 0.31, "grad_norm": 3.4214701914593606, "learning_rate": 9.864392335627118e-06, "loss": 0.2405, "step": 1122 }, { "epoch": 0.31, "grad_norm": 2.7103203951537567, "learning_rate": 9.864051237146395e-06, "loss": 0.2235, "step": 1123 }, { "epoch": 0.31, "grad_norm": 2.946075951602688, "learning_rate": 9.863709716128465e-06, "loss": 0.2324, "step": 1124 }, { "epoch": 0.31, "grad_norm": 2.937384906212223, "learning_rate": 9.863367772602994e-06, "loss": 0.2448, "step": 1125 }, { "epoch": 0.31, "grad_norm": 3.173508622376695, "learning_rate": 9.863025406599686e-06, "loss": 0.2401, "step": 1126 }, { "epoch": 0.31, "grad_norm": 2.74269730960683, "learning_rate": 9.862682618148286e-06, "loss": 0.2584, "step": 1127 }, { "epoch": 0.31, "grad_norm": 2.993651776792849, "learning_rate": 9.862339407278564e-06, "loss": 0.2663, "step": 1128 }, { "epoch": 0.31, "grad_norm": 3.2336638887277633, "learning_rate": 9.861995774020341e-06, "loss": 0.2485, "step": 1129 }, { "epoch": 0.31, "grad_norm": 2.873204241018341, "learning_rate": 9.861651718403466e-06, "loss": 0.2262, "step": 1130 }, { "epoch": 0.31, "grad_norm": 3.1366089316912515, "learning_rate": 9.861307240457828e-06, "loss": 0.2602, "step": 1131 }, { "epoch": 0.31, "grad_norm": 2.8291904007710147, "learning_rate": 9.86096234021335e-06, "loss": 0.2749, "step": 1132 }, { "epoch": 0.31, "grad_norm": 3.2039991518725994, "learning_rate": 9.860617017699993e-06, "loss": 0.2952, "step": 1133 }, { "epoch": 0.31, "grad_norm": 2.72618487498062, "learning_rate": 9.86027127294776e-06, "loss": 0.227, "step": 1134 }, { "epoch": 0.31, "grad_norm": 2.941846000529975, "learning_rate": 9.859925105986677e-06, "loss": 0.2487, "step": 1135 }, { "epoch": 0.31, "grad_norm": 3.5160975179268226, "learning_rate": 9.859578516846822e-06, "loss": 0.2442, "step": 1136 }, { "epoch": 0.31, "grad_norm": 2.9299841378485487, "learning_rate": 9.859231505558301e-06, "loss": 0.2535, "step": 1137 }, { "epoch": 0.31, "grad_norm": 3.124378700379731, "learning_rate": 9.858884072151258e-06, "loss": 0.2476, "step": 1138 }, { "epoch": 0.31, "grad_norm": 3.1642850268511196, "learning_rate": 9.858536216655875e-06, "loss": 0.2761, "step": 1139 }, { "epoch": 0.31, "grad_norm": 3.354830281022868, "learning_rate": 9.85818793910237e-06, "loss": 0.2496, "step": 1140 }, { "epoch": 0.31, "grad_norm": 2.9455152347080973, "learning_rate": 9.857839239521e-06, "loss": 0.2222, "step": 1141 }, { "epoch": 0.31, "grad_norm": 3.0777993632048695, "learning_rate": 9.85749011794205e-06, "loss": 0.2382, "step": 1142 }, { "epoch": 0.31, "grad_norm": 3.288589623666548, "learning_rate": 9.857140574395854e-06, "loss": 0.2385, "step": 1143 }, { "epoch": 0.31, "grad_norm": 3.0879987681864836, "learning_rate": 9.856790608912775e-06, "loss": 0.24, "step": 1144 }, { "epoch": 0.31, "grad_norm": 2.7891813505355767, "learning_rate": 9.856440221523211e-06, "loss": 0.2225, "step": 1145 }, { "epoch": 0.31, "grad_norm": 2.930758829384716, "learning_rate": 9.856089412257605e-06, "loss": 0.2209, "step": 1146 }, { "epoch": 0.31, "grad_norm": 2.9075032519743202, "learning_rate": 9.855738181146427e-06, "loss": 0.221, "step": 1147 }, { "epoch": 0.31, "grad_norm": 2.799038534038176, "learning_rate": 9.855386528220194e-06, "loss": 0.2453, "step": 1148 }, { "epoch": 0.31, "grad_norm": 3.031666778266753, "learning_rate": 9.855034453509449e-06, "loss": 0.2664, "step": 1149 }, { "epoch": 0.31, "grad_norm": 3.101983093152087, "learning_rate": 9.854681957044779e-06, "loss": 0.2506, "step": 1150 }, { "epoch": 0.31, "grad_norm": 3.538405884548164, "learning_rate": 9.854329038856802e-06, "loss": 0.295, "step": 1151 }, { "epoch": 0.31, "grad_norm": 2.9999293062537107, "learning_rate": 9.85397569897618e-06, "loss": 0.2355, "step": 1152 }, { "epoch": 0.31, "grad_norm": 3.0846498570996985, "learning_rate": 9.853621937433603e-06, "loss": 0.2286, "step": 1153 }, { "epoch": 0.32, "grad_norm": 3.0189315640607655, "learning_rate": 9.853267754259808e-06, "loss": 0.259, "step": 1154 }, { "epoch": 0.32, "grad_norm": 3.186436806117984, "learning_rate": 9.852913149485556e-06, "loss": 0.2426, "step": 1155 }, { "epoch": 0.32, "grad_norm": 2.9655259808621697, "learning_rate": 9.852558123141656e-06, "loss": 0.2479, "step": 1156 }, { "epoch": 0.32, "grad_norm": 3.3247150840206916, "learning_rate": 9.852202675258946e-06, "loss": 0.2721, "step": 1157 }, { "epoch": 0.32, "grad_norm": 3.2023669493726996, "learning_rate": 9.851846805868307e-06, "loss": 0.2846, "step": 1158 }, { "epoch": 0.32, "grad_norm": 3.972721170761225, "learning_rate": 9.851490515000648e-06, "loss": 0.2294, "step": 1159 }, { "epoch": 0.32, "grad_norm": 3.157474039139291, "learning_rate": 9.851133802686925e-06, "loss": 0.2699, "step": 1160 }, { "epoch": 0.32, "grad_norm": 3.2081711511612174, "learning_rate": 9.850776668958122e-06, "loss": 0.2648, "step": 1161 }, { "epoch": 0.32, "grad_norm": 2.87312505864339, "learning_rate": 9.850419113845265e-06, "loss": 0.2186, "step": 1162 }, { "epoch": 0.32, "grad_norm": 3.641740307924866, "learning_rate": 9.850061137379414e-06, "loss": 0.2877, "step": 1163 }, { "epoch": 0.32, "grad_norm": 3.399733383315742, "learning_rate": 9.849702739591665e-06, "loss": 0.293, "step": 1164 }, { "epoch": 0.32, "grad_norm": 3.3365076997579006, "learning_rate": 9.849343920513152e-06, "loss": 0.2486, "step": 1165 }, { "epoch": 0.32, "grad_norm": 3.1083814329872936, "learning_rate": 9.848984680175049e-06, "loss": 0.235, "step": 1166 }, { "epoch": 0.32, "grad_norm": 3.4197356076260603, "learning_rate": 9.848625018608558e-06, "loss": 0.3014, "step": 1167 }, { "epoch": 0.32, "grad_norm": 3.1363940715215177, "learning_rate": 9.848264935844924e-06, "loss": 0.2725, "step": 1168 }, { "epoch": 0.32, "grad_norm": 3.010372502048954, "learning_rate": 9.84790443191543e-06, "loss": 0.2624, "step": 1169 }, { "epoch": 0.32, "grad_norm": 3.035017450320761, "learning_rate": 9.84754350685139e-06, "loss": 0.242, "step": 1170 }, { "epoch": 0.32, "grad_norm": 2.973624503013562, "learning_rate": 9.847182160684158e-06, "loss": 0.2598, "step": 1171 }, { "epoch": 0.32, "grad_norm": 3.2661895519406405, "learning_rate": 9.846820393445125e-06, "loss": 0.2409, "step": 1172 }, { "epoch": 0.32, "grad_norm": 3.378157190162606, "learning_rate": 9.846458205165715e-06, "loss": 0.2622, "step": 1173 }, { "epoch": 0.32, "grad_norm": 3.3415247347195054, "learning_rate": 9.846095595877392e-06, "loss": 0.2468, "step": 1174 }, { "epoch": 0.32, "grad_norm": 3.267267724576772, "learning_rate": 9.845732565611657e-06, "loss": 0.2569, "step": 1175 }, { "epoch": 0.32, "grad_norm": 3.2069423372117574, "learning_rate": 9.845369114400045e-06, "loss": 0.2472, "step": 1176 }, { "epoch": 0.32, "grad_norm": 3.1745211744508137, "learning_rate": 9.84500524227413e-06, "loss": 0.2757, "step": 1177 }, { "epoch": 0.32, "grad_norm": 2.9301821375250134, "learning_rate": 9.844640949265521e-06, "loss": 0.2447, "step": 1178 }, { "epoch": 0.32, "grad_norm": 3.270593293447514, "learning_rate": 9.844276235405861e-06, "loss": 0.2501, "step": 1179 }, { "epoch": 0.32, "grad_norm": 2.8292365517932136, "learning_rate": 9.843911100726838e-06, "loss": 0.2451, "step": 1180 }, { "epoch": 0.32, "grad_norm": 4.24458190791579, "learning_rate": 9.843545545260166e-06, "loss": 0.255, "step": 1181 }, { "epoch": 0.32, "grad_norm": 2.9921071614153956, "learning_rate": 9.843179569037601e-06, "loss": 0.2558, "step": 1182 }, { "epoch": 0.32, "grad_norm": 3.120135504640428, "learning_rate": 9.84281317209094e-06, "loss": 0.2527, "step": 1183 }, { "epoch": 0.32, "grad_norm": 2.9574435847374505, "learning_rate": 9.842446354452007e-06, "loss": 0.2607, "step": 1184 }, { "epoch": 0.32, "grad_norm": 3.00655728891264, "learning_rate": 9.84207911615267e-06, "loss": 0.2348, "step": 1185 }, { "epoch": 0.32, "grad_norm": 2.564259275203512, "learning_rate": 9.841711457224827e-06, "loss": 0.2315, "step": 1186 }, { "epoch": 0.32, "grad_norm": 3.178516775839786, "learning_rate": 9.84134337770042e-06, "loss": 0.2604, "step": 1187 }, { "epoch": 0.32, "grad_norm": 2.9118185271293666, "learning_rate": 9.840974877611423e-06, "loss": 0.2391, "step": 1188 }, { "epoch": 0.32, "grad_norm": 2.999488208569082, "learning_rate": 9.840605956989846e-06, "loss": 0.2473, "step": 1189 }, { "epoch": 0.32, "grad_norm": 2.913629322338597, "learning_rate": 9.840236615867738e-06, "loss": 0.2154, "step": 1190 }, { "epoch": 0.33, "grad_norm": 2.884895425217116, "learning_rate": 9.839866854277182e-06, "loss": 0.2496, "step": 1191 }, { "epoch": 0.33, "grad_norm": 3.0894615152157927, "learning_rate": 9.839496672250301e-06, "loss": 0.2567, "step": 1192 }, { "epoch": 0.33, "grad_norm": 3.068759534760437, "learning_rate": 9.839126069819254e-06, "loss": 0.2507, "step": 1193 }, { "epoch": 0.33, "grad_norm": 3.187900604153824, "learning_rate": 9.838755047016229e-06, "loss": 0.2576, "step": 1194 }, { "epoch": 0.33, "grad_norm": 2.9566174388486464, "learning_rate": 9.838383603873463e-06, "loss": 0.2433, "step": 1195 }, { "epoch": 0.33, "grad_norm": 2.9759192642662096, "learning_rate": 9.838011740423219e-06, "loss": 0.239, "step": 1196 }, { "epoch": 0.33, "grad_norm": 2.972831661343069, "learning_rate": 9.837639456697802e-06, "loss": 0.232, "step": 1197 }, { "epoch": 0.33, "grad_norm": 2.7750806336622, "learning_rate": 9.837266752729552e-06, "loss": 0.2367, "step": 1198 }, { "epoch": 0.33, "grad_norm": 2.827691379197103, "learning_rate": 9.836893628550846e-06, "loss": 0.2408, "step": 1199 }, { "epoch": 0.33, "grad_norm": 2.815658170141337, "learning_rate": 9.836520084194097e-06, "loss": 0.2312, "step": 1200 }, { "epoch": 0.33, "grad_norm": 3.057464773493854, "learning_rate": 9.836146119691752e-06, "loss": 0.2744, "step": 1201 }, { "epoch": 0.33, "grad_norm": 2.8656367413290393, "learning_rate": 9.8357717350763e-06, "loss": 0.2372, "step": 1202 }, { "epoch": 0.33, "grad_norm": 3.2514347865106568, "learning_rate": 9.835396930380264e-06, "loss": 0.2597, "step": 1203 }, { "epoch": 0.33, "grad_norm": 2.955441368222323, "learning_rate": 9.835021705636201e-06, "loss": 0.2633, "step": 1204 }, { "epoch": 0.33, "grad_norm": 3.0960740638895206, "learning_rate": 9.834646060876707e-06, "loss": 0.2399, "step": 1205 }, { "epoch": 0.33, "grad_norm": 3.1750070887087336, "learning_rate": 9.834269996134416e-06, "loss": 0.2956, "step": 1206 }, { "epoch": 0.33, "grad_norm": 3.628820775880156, "learning_rate": 9.833893511441993e-06, "loss": 0.2576, "step": 1207 }, { "epoch": 0.33, "grad_norm": 3.1801194195762683, "learning_rate": 9.833516606832146e-06, "loss": 0.29, "step": 1208 }, { "epoch": 0.33, "grad_norm": 3.053432387455168, "learning_rate": 9.833139282337615e-06, "loss": 0.2735, "step": 1209 }, { "epoch": 0.33, "grad_norm": 3.0218155010339225, "learning_rate": 9.832761537991177e-06, "loss": 0.269, "step": 1210 }, { "epoch": 0.33, "grad_norm": 3.163316769954804, "learning_rate": 9.83238337382565e-06, "loss": 0.2359, "step": 1211 }, { "epoch": 0.33, "grad_norm": 3.341562744210851, "learning_rate": 9.832004789873883e-06, "loss": 0.2668, "step": 1212 }, { "epoch": 0.33, "grad_norm": 3.0137565479094057, "learning_rate": 9.831625786168762e-06, "loss": 0.2183, "step": 1213 }, { "epoch": 0.33, "grad_norm": 3.134347588938876, "learning_rate": 9.83124636274321e-06, "loss": 0.2778, "step": 1214 }, { "epoch": 0.33, "grad_norm": 3.3810173760985975, "learning_rate": 9.830866519630191e-06, "loss": 0.2521, "step": 1215 }, { "epoch": 0.33, "grad_norm": 3.0092605989494436, "learning_rate": 9.8304862568627e-06, "loss": 0.2383, "step": 1216 }, { "epoch": 0.33, "grad_norm": 2.7881943397728652, "learning_rate": 9.83010557447377e-06, "loss": 0.2317, "step": 1217 }, { "epoch": 0.33, "grad_norm": 2.7706960576828523, "learning_rate": 9.829724472496471e-06, "loss": 0.2408, "step": 1218 }, { "epoch": 0.33, "grad_norm": 2.931715255855614, "learning_rate": 9.829342950963908e-06, "loss": 0.2207, "step": 1219 }, { "epoch": 0.33, "grad_norm": 3.164116215641434, "learning_rate": 9.828961009909225e-06, "loss": 0.2732, "step": 1220 }, { "epoch": 0.33, "grad_norm": 2.843276605833962, "learning_rate": 9.8285786493656e-06, "loss": 0.2543, "step": 1221 }, { "epoch": 0.33, "grad_norm": 3.0044667189776066, "learning_rate": 9.82819586936625e-06, "loss": 0.2552, "step": 1222 }, { "epoch": 0.33, "grad_norm": 3.415275420698183, "learning_rate": 9.827812669944423e-06, "loss": 0.2491, "step": 1223 }, { "epoch": 0.33, "grad_norm": 2.848663639697217, "learning_rate": 9.827429051133412e-06, "loss": 0.2186, "step": 1224 }, { "epoch": 0.33, "grad_norm": 3.1255228590451396, "learning_rate": 9.82704501296654e-06, "loss": 0.2589, "step": 1225 }, { "epoch": 0.33, "grad_norm": 3.1109261829573573, "learning_rate": 9.826660555477167e-06, "loss": 0.2406, "step": 1226 }, { "epoch": 0.33, "grad_norm": 3.130609667597135, "learning_rate": 9.82627567869869e-06, "loss": 0.2716, "step": 1227 }, { "epoch": 0.34, "grad_norm": 2.8743900904881006, "learning_rate": 9.825890382664547e-06, "loss": 0.2128, "step": 1228 }, { "epoch": 0.34, "grad_norm": 3.134640376995852, "learning_rate": 9.825504667408205e-06, "loss": 0.2535, "step": 1229 }, { "epoch": 0.34, "grad_norm": 3.0572276927796738, "learning_rate": 9.825118532963172e-06, "loss": 0.2477, "step": 1230 }, { "epoch": 0.34, "grad_norm": 2.9615852301376675, "learning_rate": 9.824731979362991e-06, "loss": 0.2404, "step": 1231 }, { "epoch": 0.34, "grad_norm": 3.195671642915052, "learning_rate": 9.824345006641243e-06, "loss": 0.2843, "step": 1232 }, { "epoch": 0.34, "grad_norm": 2.933194017097645, "learning_rate": 9.82395761483154e-06, "loss": 0.2337, "step": 1233 }, { "epoch": 0.34, "grad_norm": 2.9125056938085208, "learning_rate": 9.823569803967538e-06, "loss": 0.2115, "step": 1234 }, { "epoch": 0.34, "grad_norm": 2.588040278512406, "learning_rate": 9.823181574082927e-06, "loss": 0.2162, "step": 1235 }, { "epoch": 0.34, "grad_norm": 2.9811209941656434, "learning_rate": 9.822792925211429e-06, "loss": 0.2504, "step": 1236 }, { "epoch": 0.34, "grad_norm": 3.0058377718173963, "learning_rate": 9.822403857386808e-06, "loss": 0.2644, "step": 1237 }, { "epoch": 0.34, "grad_norm": 3.1204325390320045, "learning_rate": 9.822014370642861e-06, "loss": 0.2271, "step": 1238 }, { "epoch": 0.34, "grad_norm": 3.0256261042478316, "learning_rate": 9.821624465013422e-06, "loss": 0.2624, "step": 1239 }, { "epoch": 0.34, "grad_norm": 3.0178108233260814, "learning_rate": 9.821234140532363e-06, "loss": 0.2617, "step": 1240 }, { "epoch": 0.34, "grad_norm": 2.8272765368690966, "learning_rate": 9.82084339723359e-06, "loss": 0.259, "step": 1241 }, { "epoch": 0.34, "grad_norm": 2.8937466377850645, "learning_rate": 9.82045223515105e-06, "loss": 0.244, "step": 1242 }, { "epoch": 0.34, "grad_norm": 2.969029717164541, "learning_rate": 9.820060654318718e-06, "loss": 0.2304, "step": 1243 }, { "epoch": 0.34, "grad_norm": 2.9931941186219126, "learning_rate": 9.819668654770613e-06, "loss": 0.2354, "step": 1244 }, { "epoch": 0.34, "grad_norm": 3.389394950149006, "learning_rate": 9.81927623654079e-06, "loss": 0.2686, "step": 1245 }, { "epoch": 0.34, "grad_norm": 2.6485410148762205, "learning_rate": 9.818883399663333e-06, "loss": 0.2106, "step": 1246 }, { "epoch": 0.34, "grad_norm": 2.9073598792710653, "learning_rate": 9.818490144172372e-06, "loss": 0.2443, "step": 1247 }, { "epoch": 0.34, "grad_norm": 3.364372678645247, "learning_rate": 9.818096470102067e-06, "loss": 0.2809, "step": 1248 }, { "epoch": 0.34, "grad_norm": 4.551128418696761, "learning_rate": 9.817702377486616e-06, "loss": 0.2588, "step": 1249 }, { "epoch": 0.34, "grad_norm": 3.1391068765915704, "learning_rate": 9.817307866360255e-06, "loss": 0.2502, "step": 1250 }, { "epoch": 0.34, "grad_norm": 2.9030071165063744, "learning_rate": 9.816912936757252e-06, "loss": 0.2261, "step": 1251 }, { "epoch": 0.34, "grad_norm": 3.0904473213877157, "learning_rate": 9.816517588711918e-06, "loss": 0.2609, "step": 1252 }, { "epoch": 0.34, "grad_norm": 3.141150929340751, "learning_rate": 9.816121822258595e-06, "loss": 0.2798, "step": 1253 }, { "epoch": 0.34, "grad_norm": 2.645682764054394, "learning_rate": 9.815725637431663e-06, "loss": 0.2083, "step": 1254 }, { "epoch": 0.34, "grad_norm": 2.771041108203385, "learning_rate": 9.815329034265537e-06, "loss": 0.2562, "step": 1255 }, { "epoch": 0.34, "grad_norm": 3.0609998682706734, "learning_rate": 9.81493201279467e-06, "loss": 0.2606, "step": 1256 }, { "epoch": 0.34, "grad_norm": 3.1785830974871714, "learning_rate": 9.814534573053554e-06, "loss": 0.2162, "step": 1257 }, { "epoch": 0.34, "grad_norm": 2.781839154828549, "learning_rate": 9.814136715076712e-06, "loss": 0.2392, "step": 1258 }, { "epoch": 0.34, "grad_norm": 2.732361163820835, "learning_rate": 9.813738438898705e-06, "loss": 0.217, "step": 1259 }, { "epoch": 0.34, "grad_norm": 3.368933313189031, "learning_rate": 9.813339744554134e-06, "loss": 0.288, "step": 1260 }, { "epoch": 0.34, "grad_norm": 2.622675303832583, "learning_rate": 9.812940632077629e-06, "loss": 0.2519, "step": 1261 }, { "epoch": 0.34, "grad_norm": 3.4208772684634035, "learning_rate": 9.812541101503863e-06, "loss": 0.2604, "step": 1262 }, { "epoch": 0.34, "grad_norm": 4.113738877387231, "learning_rate": 9.812141152867545e-06, "loss": 0.2442, "step": 1263 }, { "epoch": 0.35, "grad_norm": 3.139036950616636, "learning_rate": 9.811740786203414e-06, "loss": 0.2316, "step": 1264 }, { "epoch": 0.35, "grad_norm": 4.419167717116625, "learning_rate": 9.811340001546252e-06, "loss": 0.2289, "step": 1265 }, { "epoch": 0.35, "grad_norm": 3.2143647000980833, "learning_rate": 9.810938798930876e-06, "loss": 0.2416, "step": 1266 }, { "epoch": 0.35, "grad_norm": 2.9808804309636843, "learning_rate": 9.810537178392137e-06, "loss": 0.2415, "step": 1267 }, { "epoch": 0.35, "grad_norm": 2.9939559005988365, "learning_rate": 9.810135139964922e-06, "loss": 0.2214, "step": 1268 }, { "epoch": 0.35, "grad_norm": 3.7757786289149005, "learning_rate": 9.809732683684159e-06, "loss": 0.2633, "step": 1269 }, { "epoch": 0.35, "grad_norm": 3.8083520955299472, "learning_rate": 9.809329809584808e-06, "loss": 0.2725, "step": 1270 }, { "epoch": 0.35, "grad_norm": 2.8358255488788093, "learning_rate": 9.808926517701865e-06, "loss": 0.213, "step": 1271 }, { "epoch": 0.35, "grad_norm": 2.902968770899238, "learning_rate": 9.808522808070365e-06, "loss": 0.2064, "step": 1272 }, { "epoch": 0.35, "grad_norm": 2.972331018988225, "learning_rate": 9.808118680725376e-06, "loss": 0.2501, "step": 1273 }, { "epoch": 0.35, "grad_norm": 2.48357951576396, "learning_rate": 9.807714135702008e-06, "loss": 0.2162, "step": 1274 }, { "epoch": 0.35, "grad_norm": 2.872521740272126, "learning_rate": 9.8073091730354e-06, "loss": 0.2495, "step": 1275 }, { "epoch": 0.35, "grad_norm": 3.7941610101784353, "learning_rate": 9.806903792760733e-06, "loss": 0.2341, "step": 1276 }, { "epoch": 0.35, "grad_norm": 2.920416484684805, "learning_rate": 9.806497994913223e-06, "loss": 0.2274, "step": 1277 }, { "epoch": 0.35, "grad_norm": 3.036458782351314, "learning_rate": 9.806091779528119e-06, "loss": 0.2396, "step": 1278 }, { "epoch": 0.35, "grad_norm": 3.0625407075135818, "learning_rate": 9.80568514664071e-06, "loss": 0.2218, "step": 1279 }, { "epoch": 0.35, "grad_norm": 3.096712644925063, "learning_rate": 9.805278096286318e-06, "loss": 0.2557, "step": 1280 }, { "epoch": 0.35, "grad_norm": 2.7830752639758587, "learning_rate": 9.804870628500306e-06, "loss": 0.201, "step": 1281 }, { "epoch": 0.35, "grad_norm": 2.7330453381241333, "learning_rate": 9.80446274331807e-06, "loss": 0.2338, "step": 1282 }, { "epoch": 0.35, "grad_norm": 3.309180277266247, "learning_rate": 9.80405444077504e-06, "loss": 0.292, "step": 1283 }, { "epoch": 0.35, "grad_norm": 2.935178279963719, "learning_rate": 9.803645720906689e-06, "loss": 0.2319, "step": 1284 }, { "epoch": 0.35, "grad_norm": 2.942633588779663, "learning_rate": 9.80323658374852e-06, "loss": 0.2355, "step": 1285 }, { "epoch": 0.35, "grad_norm": 3.7224101923256874, "learning_rate": 9.802827029336076e-06, "loss": 0.2756, "step": 1286 }, { "epoch": 0.35, "grad_norm": 3.3284034316773035, "learning_rate": 9.80241705770493e-06, "loss": 0.2635, "step": 1287 }, { "epoch": 0.35, "grad_norm": 3.199097902914095, "learning_rate": 9.802006668890702e-06, "loss": 0.2569, "step": 1288 }, { "epoch": 0.35, "grad_norm": 2.969066947405044, "learning_rate": 9.80159586292904e-06, "loss": 0.2515, "step": 1289 }, { "epoch": 0.35, "grad_norm": 3.995045862758655, "learning_rate": 9.80118463985563e-06, "loss": 0.266, "step": 1290 }, { "epoch": 0.35, "grad_norm": 3.146356230662008, "learning_rate": 9.800772999706194e-06, "loss": 0.2127, "step": 1291 }, { "epoch": 0.35, "grad_norm": 2.84808712157093, "learning_rate": 9.800360942516492e-06, "loss": 0.2042, "step": 1292 }, { "epoch": 0.35, "grad_norm": 3.1417815974339587, "learning_rate": 9.79994846832232e-06, "loss": 0.2511, "step": 1293 }, { "epoch": 0.35, "grad_norm": 2.846892211007102, "learning_rate": 9.799535577159508e-06, "loss": 0.2333, "step": 1294 }, { "epoch": 0.35, "grad_norm": 2.9185759963950715, "learning_rate": 9.799122269063923e-06, "loss": 0.2611, "step": 1295 }, { "epoch": 0.35, "grad_norm": 3.3653031968848564, "learning_rate": 9.798708544071471e-06, "loss": 0.2626, "step": 1296 }, { "epoch": 0.35, "grad_norm": 2.8032348067385886, "learning_rate": 9.798294402218092e-06, "loss": 0.208, "step": 1297 }, { "epoch": 0.35, "grad_norm": 3.381782583910323, "learning_rate": 9.797879843539759e-06, "loss": 0.268, "step": 1298 }, { "epoch": 0.35, "grad_norm": 2.956812105563334, "learning_rate": 9.797464868072489e-06, "loss": 0.2545, "step": 1299 }, { "epoch": 0.35, "grad_norm": 3.291638112071125, "learning_rate": 9.797049475852326e-06, "loss": 0.2421, "step": 1300 }, { "epoch": 0.36, "grad_norm": 2.9288565883410804, "learning_rate": 9.79663366691536e-06, "loss": 0.2177, "step": 1301 }, { "epoch": 0.36, "grad_norm": 3.0594606247297653, "learning_rate": 9.796217441297704e-06, "loss": 0.229, "step": 1302 }, { "epoch": 0.36, "grad_norm": 3.0042457235324296, "learning_rate": 9.795800799035524e-06, "loss": 0.2296, "step": 1303 }, { "epoch": 0.36, "grad_norm": 2.8891347001244454, "learning_rate": 9.79538374016501e-06, "loss": 0.2425, "step": 1304 }, { "epoch": 0.36, "grad_norm": 3.2146910253200183, "learning_rate": 9.794966264722393e-06, "loss": 0.2712, "step": 1305 }, { "epoch": 0.36, "grad_norm": 2.854955435082528, "learning_rate": 9.794548372743933e-06, "loss": 0.2385, "step": 1306 }, { "epoch": 0.36, "grad_norm": 3.713036302965482, "learning_rate": 9.79413006426594e-06, "loss": 0.3128, "step": 1307 }, { "epoch": 0.36, "grad_norm": 3.010742657640395, "learning_rate": 9.793711339324747e-06, "loss": 0.2378, "step": 1308 }, { "epoch": 0.36, "grad_norm": 2.7513893304966746, "learning_rate": 9.793292197956732e-06, "loss": 0.2316, "step": 1309 }, { "epoch": 0.36, "grad_norm": 2.632643880424819, "learning_rate": 9.792872640198304e-06, "loss": 0.2244, "step": 1310 }, { "epoch": 0.36, "grad_norm": 3.121034875998835, "learning_rate": 9.792452666085907e-06, "loss": 0.2065, "step": 1311 }, { "epoch": 0.36, "grad_norm": 2.916013310519197, "learning_rate": 9.792032275656027e-06, "loss": 0.2388, "step": 1312 }, { "epoch": 0.36, "grad_norm": 2.6940096650580627, "learning_rate": 9.791611468945183e-06, "loss": 0.2112, "step": 1313 }, { "epoch": 0.36, "grad_norm": 3.2924772744135278, "learning_rate": 9.791190245989928e-06, "loss": 0.2328, "step": 1314 }, { "epoch": 0.36, "grad_norm": 3.312972018158128, "learning_rate": 9.790768606826857e-06, "loss": 0.2883, "step": 1315 }, { "epoch": 0.36, "grad_norm": 2.8283500515112956, "learning_rate": 9.790346551492594e-06, "loss": 0.2352, "step": 1316 }, { "epoch": 0.36, "grad_norm": 3.3789689431948906, "learning_rate": 9.789924080023805e-06, "loss": 0.2539, "step": 1317 }, { "epoch": 0.36, "grad_norm": 3.025163570085979, "learning_rate": 9.789501192457188e-06, "loss": 0.2665, "step": 1318 }, { "epoch": 0.36, "grad_norm": 2.7201646541561666, "learning_rate": 9.789077888829481e-06, "loss": 0.2263, "step": 1319 }, { "epoch": 0.36, "grad_norm": 3.105134247206127, "learning_rate": 9.788654169177454e-06, "loss": 0.239, "step": 1320 }, { "epoch": 0.36, "grad_norm": 2.9937715566232086, "learning_rate": 9.788230033537918e-06, "loss": 0.2326, "step": 1321 }, { "epoch": 0.36, "grad_norm": 2.8930068684455095, "learning_rate": 9.787805481947715e-06, "loss": 0.2618, "step": 1322 }, { "epoch": 0.36, "grad_norm": 2.8532096304885988, "learning_rate": 9.787380514443727e-06, "loss": 0.2379, "step": 1323 }, { "epoch": 0.36, "grad_norm": 3.306251307084509, "learning_rate": 9.78695513106287e-06, "loss": 0.2648, "step": 1324 }, { "epoch": 0.36, "grad_norm": 2.88144555529565, "learning_rate": 9.786529331842096e-06, "loss": 0.2249, "step": 1325 }, { "epoch": 0.36, "grad_norm": 2.8465201053620137, "learning_rate": 9.786103116818394e-06, "loss": 0.2161, "step": 1326 }, { "epoch": 0.36, "grad_norm": 2.6155688024809303, "learning_rate": 9.78567648602879e-06, "loss": 0.2117, "step": 1327 }, { "epoch": 0.36, "grad_norm": 3.2467769788155234, "learning_rate": 9.785249439510348e-06, "loss": 0.2793, "step": 1328 }, { "epoch": 0.36, "grad_norm": 3.330731261149586, "learning_rate": 9.784821977300159e-06, "loss": 0.2838, "step": 1329 }, { "epoch": 0.36, "grad_norm": 2.9462581289474934, "learning_rate": 9.78439409943536e-06, "loss": 0.2699, "step": 1330 }, { "epoch": 0.36, "grad_norm": 2.501323978467036, "learning_rate": 9.78396580595312e-06, "loss": 0.1978, "step": 1331 }, { "epoch": 0.36, "grad_norm": 3.880850440291398, "learning_rate": 9.783537096890647e-06, "loss": 0.2732, "step": 1332 }, { "epoch": 0.36, "grad_norm": 2.846927591391255, "learning_rate": 9.783107972285177e-06, "loss": 0.2391, "step": 1333 }, { "epoch": 0.36, "grad_norm": 3.051728788517442, "learning_rate": 9.782678432173992e-06, "loss": 0.2244, "step": 1334 }, { "epoch": 0.36, "grad_norm": 2.933609749329675, "learning_rate": 9.782248476594408e-06, "loss": 0.2356, "step": 1335 }, { "epoch": 0.36, "grad_norm": 2.773405606029093, "learning_rate": 9.781818105583771e-06, "loss": 0.2244, "step": 1336 }, { "epoch": 0.37, "grad_norm": 2.846571843676018, "learning_rate": 9.781387319179465e-06, "loss": 0.232, "step": 1337 }, { "epoch": 0.37, "grad_norm": 3.011532741673929, "learning_rate": 9.780956117418919e-06, "loss": 0.2533, "step": 1338 }, { "epoch": 0.37, "grad_norm": 2.760565626444453, "learning_rate": 9.780524500339585e-06, "loss": 0.2161, "step": 1339 }, { "epoch": 0.37, "grad_norm": 2.92150953272804, "learning_rate": 9.78009246797896e-06, "loss": 0.2704, "step": 1340 }, { "epoch": 0.37, "grad_norm": 3.0816136912354355, "learning_rate": 9.779660020374577e-06, "loss": 0.2523, "step": 1341 }, { "epoch": 0.37, "grad_norm": 2.9256642668861224, "learning_rate": 9.779227157563998e-06, "loss": 0.2247, "step": 1342 }, { "epoch": 0.37, "grad_norm": 2.7818828946933163, "learning_rate": 9.778793879584828e-06, "loss": 0.2412, "step": 1343 }, { "epoch": 0.37, "grad_norm": 2.6182256789978062, "learning_rate": 9.778360186474703e-06, "loss": 0.2242, "step": 1344 }, { "epoch": 0.37, "grad_norm": 2.780293571580932, "learning_rate": 9.7779260782713e-06, "loss": 0.2386, "step": 1345 }, { "epoch": 0.37, "grad_norm": 2.7916525125110274, "learning_rate": 9.777491555012331e-06, "loss": 0.2148, "step": 1346 }, { "epoch": 0.37, "grad_norm": 2.8414558705044697, "learning_rate": 9.777056616735539e-06, "loss": 0.2277, "step": 1347 }, { "epoch": 0.37, "grad_norm": 2.7084149370794495, "learning_rate": 9.77662126347871e-06, "loss": 0.2204, "step": 1348 }, { "epoch": 0.37, "grad_norm": 3.1534858821209246, "learning_rate": 9.776185495279662e-06, "loss": 0.2169, "step": 1349 }, { "epoch": 0.37, "grad_norm": 2.887329227267067, "learning_rate": 9.775749312176249e-06, "loss": 0.2392, "step": 1350 }, { "epoch": 0.37, "grad_norm": 3.0635723562757406, "learning_rate": 9.77531271420636e-06, "loss": 0.2363, "step": 1351 }, { "epoch": 0.37, "grad_norm": 2.723383931741273, "learning_rate": 9.774875701407928e-06, "loss": 0.2119, "step": 1352 }, { "epoch": 0.37, "grad_norm": 3.101023708212512, "learning_rate": 9.77443827381891e-06, "loss": 0.2357, "step": 1353 }, { "epoch": 0.37, "grad_norm": 3.1334044743601526, "learning_rate": 9.774000431477311e-06, "loss": 0.2711, "step": 1354 }, { "epoch": 0.37, "grad_norm": 2.963598890267245, "learning_rate": 9.77356217442116e-06, "loss": 0.2795, "step": 1355 }, { "epoch": 0.37, "grad_norm": 4.4607461488362254, "learning_rate": 9.773123502688532e-06, "loss": 0.2721, "step": 1356 }, { "epoch": 0.37, "grad_norm": 3.1199559666763776, "learning_rate": 9.772684416317534e-06, "loss": 0.2609, "step": 1357 }, { "epoch": 0.37, "grad_norm": 2.881933361469555, "learning_rate": 9.772244915346307e-06, "loss": 0.2529, "step": 1358 }, { "epoch": 0.37, "grad_norm": 2.884999408927426, "learning_rate": 9.771804999813033e-06, "loss": 0.2291, "step": 1359 }, { "epoch": 0.37, "grad_norm": 3.1996217454945173, "learning_rate": 9.771364669755923e-06, "loss": 0.2423, "step": 1360 }, { "epoch": 0.37, "grad_norm": 2.9066144554302924, "learning_rate": 9.770923925213232e-06, "loss": 0.2452, "step": 1361 }, { "epoch": 0.37, "grad_norm": 2.831117241446732, "learning_rate": 9.770482766223246e-06, "loss": 0.2457, "step": 1362 }, { "epoch": 0.37, "grad_norm": 2.9745718789362137, "learning_rate": 9.77004119282429e-06, "loss": 0.239, "step": 1363 }, { "epoch": 0.37, "grad_norm": 2.9381154286601836, "learning_rate": 9.76959920505472e-06, "loss": 0.2322, "step": 1364 }, { "epoch": 0.37, "grad_norm": 3.207748380942559, "learning_rate": 9.769156802952932e-06, "loss": 0.251, "step": 1365 }, { "epoch": 0.37, "grad_norm": 2.82081950408619, "learning_rate": 9.768713986557359e-06, "loss": 0.2391, "step": 1366 }, { "epoch": 0.37, "grad_norm": 2.809456415586235, "learning_rate": 9.768270755906467e-06, "loss": 0.2528, "step": 1367 }, { "epoch": 0.37, "grad_norm": 2.3635368577684384, "learning_rate": 9.767827111038757e-06, "loss": 0.2137, "step": 1368 }, { "epoch": 0.37, "grad_norm": 2.745610774592366, "learning_rate": 9.767383051992774e-06, "loss": 0.2456, "step": 1369 }, { "epoch": 0.37, "grad_norm": 2.5318381433366692, "learning_rate": 9.766938578807088e-06, "loss": 0.2175, "step": 1370 }, { "epoch": 0.37, "grad_norm": 2.6889640603088245, "learning_rate": 9.766493691520312e-06, "loss": 0.1818, "step": 1371 }, { "epoch": 0.37, "grad_norm": 2.5748940285815154, "learning_rate": 9.766048390171091e-06, "loss": 0.226, "step": 1372 }, { "epoch": 0.37, "grad_norm": 2.658609980502939, "learning_rate": 9.765602674798112e-06, "loss": 0.2314, "step": 1373 }, { "epoch": 0.38, "grad_norm": 2.7808521318830186, "learning_rate": 9.76515654544009e-06, "loss": 0.2225, "step": 1374 }, { "epoch": 0.38, "grad_norm": 3.1063757115858017, "learning_rate": 9.764710002135784e-06, "loss": 0.242, "step": 1375 }, { "epoch": 0.38, "grad_norm": 2.979320561193384, "learning_rate": 9.764263044923983e-06, "loss": 0.2046, "step": 1376 }, { "epoch": 0.38, "grad_norm": 2.726814486021948, "learning_rate": 9.763815673843511e-06, "loss": 0.2154, "step": 1377 }, { "epoch": 0.38, "grad_norm": 3.08736527989591, "learning_rate": 9.763367888933235e-06, "loss": 0.2973, "step": 1378 }, { "epoch": 0.38, "grad_norm": 3.378369187913458, "learning_rate": 9.762919690232053e-06, "loss": 0.2429, "step": 1379 }, { "epoch": 0.38, "grad_norm": 2.6106792037935933, "learning_rate": 9.762471077778898e-06, "loss": 0.2022, "step": 1380 }, { "epoch": 0.38, "grad_norm": 4.028391364636959, "learning_rate": 9.762022051612742e-06, "loss": 0.2316, "step": 1381 }, { "epoch": 0.38, "grad_norm": 2.64027256655154, "learning_rate": 9.761572611772592e-06, "loss": 0.1984, "step": 1382 }, { "epoch": 0.38, "grad_norm": 3.3860530705961587, "learning_rate": 9.76112275829749e-06, "loss": 0.2753, "step": 1383 }, { "epoch": 0.38, "grad_norm": 2.3926531943143763, "learning_rate": 9.760672491226515e-06, "loss": 0.1956, "step": 1384 }, { "epoch": 0.38, "grad_norm": 2.6780198406056708, "learning_rate": 9.76022181059878e-06, "loss": 0.2218, "step": 1385 }, { "epoch": 0.38, "grad_norm": 3.014662980555568, "learning_rate": 9.759770716453436e-06, "loss": 0.2635, "step": 1386 }, { "epoch": 0.38, "grad_norm": 2.8401462366944754, "learning_rate": 9.759319208829671e-06, "loss": 0.2184, "step": 1387 }, { "epoch": 0.38, "grad_norm": 2.60835863599508, "learning_rate": 9.758867287766705e-06, "loss": 0.2568, "step": 1388 }, { "epoch": 0.38, "grad_norm": 3.441044084131762, "learning_rate": 9.758414953303796e-06, "loss": 0.2675, "step": 1389 }, { "epoch": 0.38, "grad_norm": 2.880216438282025, "learning_rate": 9.75796220548024e-06, "loss": 0.2523, "step": 1390 }, { "epoch": 0.38, "grad_norm": 2.762354414228669, "learning_rate": 9.757509044335367e-06, "loss": 0.2574, "step": 1391 }, { "epoch": 0.38, "grad_norm": 3.10827643677901, "learning_rate": 9.757055469908541e-06, "loss": 0.2631, "step": 1392 }, { "epoch": 0.38, "grad_norm": 3.45109620298898, "learning_rate": 9.756601482239162e-06, "loss": 0.2915, "step": 1393 }, { "epoch": 0.38, "grad_norm": 2.814712553859058, "learning_rate": 9.756147081366673e-06, "loss": 0.251, "step": 1394 }, { "epoch": 0.38, "grad_norm": 2.9052448870008205, "learning_rate": 9.755692267330542e-06, "loss": 0.237, "step": 1395 }, { "epoch": 0.38, "grad_norm": 3.041018731645638, "learning_rate": 9.755237040170284e-06, "loss": 0.2328, "step": 1396 }, { "epoch": 0.38, "grad_norm": 2.988361672076705, "learning_rate": 9.754781399925439e-06, "loss": 0.2374, "step": 1397 }, { "epoch": 0.38, "grad_norm": 2.6507589502827287, "learning_rate": 9.754325346635592e-06, "loss": 0.2248, "step": 1398 }, { "epoch": 0.38, "grad_norm": 2.881351900232617, "learning_rate": 9.753868880340359e-06, "loss": 0.2717, "step": 1399 }, { "epoch": 0.38, "grad_norm": 3.1008929889260797, "learning_rate": 9.75341200107939e-06, "loss": 0.2652, "step": 1400 }, { "epoch": 0.38, "grad_norm": 2.627201150191938, "learning_rate": 9.752954708892379e-06, "loss": 0.2109, "step": 1401 }, { "epoch": 0.38, "grad_norm": 2.789514054087141, "learning_rate": 9.752497003819047e-06, "loss": 0.221, "step": 1402 }, { "epoch": 0.38, "grad_norm": 2.7638005097773566, "learning_rate": 9.752038885899154e-06, "loss": 0.2154, "step": 1403 }, { "epoch": 0.38, "grad_norm": 2.682996603419557, "learning_rate": 9.7515803551725e-06, "loss": 0.2129, "step": 1404 }, { "epoch": 0.38, "grad_norm": 2.9456725535516584, "learning_rate": 9.751121411678915e-06, "loss": 0.2246, "step": 1405 }, { "epoch": 0.38, "grad_norm": 2.7442855938194004, "learning_rate": 9.750662055458268e-06, "loss": 0.2121, "step": 1406 }, { "epoch": 0.38, "grad_norm": 3.0050591184089117, "learning_rate": 9.75020228655046e-06, "loss": 0.241, "step": 1407 }, { "epoch": 0.38, "grad_norm": 3.2350317194153346, "learning_rate": 9.749742104995437e-06, "loss": 0.2804, "step": 1408 }, { "epoch": 0.38, "grad_norm": 2.970326638213501, "learning_rate": 9.74928151083317e-06, "loss": 0.2476, "step": 1409 }, { "epoch": 0.38, "grad_norm": 3.1555676716786967, "learning_rate": 9.748820504103671e-06, "loss": 0.241, "step": 1410 }, { "epoch": 0.39, "grad_norm": 3.1348716369369978, "learning_rate": 9.748359084846988e-06, "loss": 0.2449, "step": 1411 }, { "epoch": 0.39, "grad_norm": 2.8001187749348473, "learning_rate": 9.747897253103203e-06, "loss": 0.238, "step": 1412 }, { "epoch": 0.39, "grad_norm": 2.7498967043995846, "learning_rate": 9.747435008912438e-06, "loss": 0.246, "step": 1413 }, { "epoch": 0.39, "grad_norm": 2.9037105384409476, "learning_rate": 9.746972352314845e-06, "loss": 0.1931, "step": 1414 }, { "epoch": 0.39, "grad_norm": 3.0233533254225695, "learning_rate": 9.746509283350615e-06, "loss": 0.2494, "step": 1415 }, { "epoch": 0.39, "grad_norm": 2.709996444827517, "learning_rate": 9.746045802059978e-06, "loss": 0.2177, "step": 1416 }, { "epoch": 0.39, "grad_norm": 2.7811529903648444, "learning_rate": 9.745581908483192e-06, "loss": 0.2255, "step": 1417 }, { "epoch": 0.39, "grad_norm": 3.0215441716866382, "learning_rate": 9.745117602660556e-06, "loss": 0.237, "step": 1418 }, { "epoch": 0.39, "grad_norm": 2.796389923691804, "learning_rate": 9.744652884632406e-06, "loss": 0.2196, "step": 1419 }, { "epoch": 0.39, "grad_norm": 2.941517062979519, "learning_rate": 9.74418775443911e-06, "loss": 0.2382, "step": 1420 }, { "epoch": 0.39, "grad_norm": 2.8072558205273435, "learning_rate": 9.743722212121075e-06, "loss": 0.25, "step": 1421 }, { "epoch": 0.39, "grad_norm": 2.900209166241114, "learning_rate": 9.743256257718741e-06, "loss": 0.2139, "step": 1422 }, { "epoch": 0.39, "grad_norm": 2.912636878828581, "learning_rate": 9.742789891272586e-06, "loss": 0.2279, "step": 1423 }, { "epoch": 0.39, "grad_norm": 2.8156612477239147, "learning_rate": 9.742323112823123e-06, "loss": 0.2057, "step": 1424 }, { "epoch": 0.39, "grad_norm": 2.7964001395071136, "learning_rate": 9.741855922410898e-06, "loss": 0.2417, "step": 1425 }, { "epoch": 0.39, "grad_norm": 2.9162231197120745, "learning_rate": 9.741388320076502e-06, "loss": 0.2419, "step": 1426 }, { "epoch": 0.39, "grad_norm": 2.7809086022401193, "learning_rate": 9.740920305860548e-06, "loss": 0.2265, "step": 1427 }, { "epoch": 0.39, "grad_norm": 2.983886010953564, "learning_rate": 9.740451879803697e-06, "loss": 0.224, "step": 1428 }, { "epoch": 0.39, "grad_norm": 3.0570074553240496, "learning_rate": 9.739983041946639e-06, "loss": 0.2387, "step": 1429 }, { "epoch": 0.39, "grad_norm": 2.6133874096467746, "learning_rate": 9.7395137923301e-06, "loss": 0.1918, "step": 1430 }, { "epoch": 0.39, "grad_norm": 2.764566808220084, "learning_rate": 9.739044130994848e-06, "loss": 0.2353, "step": 1431 }, { "epoch": 0.39, "grad_norm": 2.6752715070348048, "learning_rate": 9.73857405798168e-06, "loss": 0.2515, "step": 1432 }, { "epoch": 0.39, "grad_norm": 2.5198857225780302, "learning_rate": 9.738103573331427e-06, "loss": 0.1994, "step": 1433 }, { "epoch": 0.39, "grad_norm": 3.0083749373139623, "learning_rate": 9.737632677084967e-06, "loss": 0.2537, "step": 1434 }, { "epoch": 0.39, "grad_norm": 2.8177570725757586, "learning_rate": 9.737161369283201e-06, "loss": 0.2399, "step": 1435 }, { "epoch": 0.39, "grad_norm": 2.8526563281320363, "learning_rate": 9.736689649967074e-06, "loss": 0.2477, "step": 1436 }, { "epoch": 0.39, "grad_norm": 3.1594934516407074, "learning_rate": 9.736217519177562e-06, "loss": 0.2089, "step": 1437 }, { "epoch": 0.39, "grad_norm": 2.9541129715645886, "learning_rate": 9.735744976955681e-06, "loss": 0.237, "step": 1438 }, { "epoch": 0.39, "grad_norm": 2.7483571705215533, "learning_rate": 9.735272023342476e-06, "loss": 0.247, "step": 1439 }, { "epoch": 0.39, "grad_norm": 2.6214678485011973, "learning_rate": 9.734798658379038e-06, "loss": 0.1828, "step": 1440 }, { "epoch": 0.39, "grad_norm": 2.874750182668853, "learning_rate": 9.734324882106486e-06, "loss": 0.247, "step": 1441 }, { "epoch": 0.39, "grad_norm": 2.602984845339882, "learning_rate": 9.733850694565975e-06, "loss": 0.2388, "step": 1442 }, { "epoch": 0.39, "grad_norm": 2.8550399989129245, "learning_rate": 9.733376095798698e-06, "loss": 0.2194, "step": 1443 }, { "epoch": 0.39, "grad_norm": 2.874783969070111, "learning_rate": 9.732901085845884e-06, "loss": 0.2125, "step": 1444 }, { "epoch": 0.39, "grad_norm": 2.4286741135106733, "learning_rate": 9.732425664748794e-06, "loss": 0.2127, "step": 1445 }, { "epoch": 0.39, "grad_norm": 2.9287319477749034, "learning_rate": 9.731949832548733e-06, "loss": 0.25, "step": 1446 }, { "epoch": 0.4, "grad_norm": 3.056696971915079, "learning_rate": 9.731473589287031e-06, "loss": 0.2447, "step": 1447 }, { "epoch": 0.4, "grad_norm": 2.625770808300889, "learning_rate": 9.730996935005062e-06, "loss": 0.2301, "step": 1448 }, { "epoch": 0.4, "grad_norm": 2.713978943811164, "learning_rate": 9.730519869744231e-06, "loss": 0.1856, "step": 1449 }, { "epoch": 0.4, "grad_norm": 2.911788171947011, "learning_rate": 9.730042393545981e-06, "loss": 0.2099, "step": 1450 }, { "epoch": 0.4, "grad_norm": 3.685149597225699, "learning_rate": 9.729564506451791e-06, "loss": 0.2496, "step": 1451 }, { "epoch": 0.4, "grad_norm": 3.1868346876719644, "learning_rate": 9.729086208503174e-06, "loss": 0.2398, "step": 1452 }, { "epoch": 0.4, "grad_norm": 2.9297875193130842, "learning_rate": 9.72860749974168e-06, "loss": 0.2114, "step": 1453 }, { "epoch": 0.4, "grad_norm": 3.027276424904841, "learning_rate": 9.728128380208893e-06, "loss": 0.253, "step": 1454 }, { "epoch": 0.4, "grad_norm": 2.775986367481444, "learning_rate": 9.727648849946432e-06, "loss": 0.2359, "step": 1455 }, { "epoch": 0.4, "grad_norm": 3.3398402406789036, "learning_rate": 9.727168908995958e-06, "loss": 0.2268, "step": 1456 }, { "epoch": 0.4, "grad_norm": 3.1048207044183993, "learning_rate": 9.72668855739916e-06, "loss": 0.2414, "step": 1457 }, { "epoch": 0.4, "grad_norm": 2.9130027237547287, "learning_rate": 9.726207795197768e-06, "loss": 0.244, "step": 1458 }, { "epoch": 0.4, "grad_norm": 2.737956355562966, "learning_rate": 9.725726622433544e-06, "loss": 0.2005, "step": 1459 }, { "epoch": 0.4, "grad_norm": 2.7139957498767293, "learning_rate": 9.725245039148287e-06, "loss": 0.218, "step": 1460 }, { "epoch": 0.4, "grad_norm": 2.8155470025174916, "learning_rate": 9.724763045383833e-06, "loss": 0.2219, "step": 1461 }, { "epoch": 0.4, "grad_norm": 2.80076173339565, "learning_rate": 9.724280641182052e-06, "loss": 0.225, "step": 1462 }, { "epoch": 0.4, "grad_norm": 2.9626463090711974, "learning_rate": 9.723797826584849e-06, "loss": 0.2399, "step": 1463 }, { "epoch": 0.4, "grad_norm": 3.1460763243447225, "learning_rate": 9.723314601634169e-06, "loss": 0.2455, "step": 1464 }, { "epoch": 0.4, "grad_norm": 2.704317440787788, "learning_rate": 9.722830966371985e-06, "loss": 0.2313, "step": 1465 }, { "epoch": 0.4, "grad_norm": 2.8420013040949295, "learning_rate": 9.722346920840313e-06, "loss": 0.2502, "step": 1466 }, { "epoch": 0.4, "grad_norm": 3.991844196300371, "learning_rate": 9.721862465081202e-06, "loss": 0.2233, "step": 1467 }, { "epoch": 0.4, "grad_norm": 2.8066055210309897, "learning_rate": 9.721377599136736e-06, "loss": 0.2261, "step": 1468 }, { "epoch": 0.4, "grad_norm": 2.912754556760992, "learning_rate": 9.720892323049034e-06, "loss": 0.232, "step": 1469 }, { "epoch": 0.4, "grad_norm": 2.5641644611215395, "learning_rate": 9.720406636860252e-06, "loss": 0.2125, "step": 1470 }, { "epoch": 0.4, "grad_norm": 2.574205658915224, "learning_rate": 9.719920540612581e-06, "loss": 0.235, "step": 1471 }, { "epoch": 0.4, "grad_norm": 3.0153345293696905, "learning_rate": 9.71943403434825e-06, "loss": 0.2584, "step": 1472 }, { "epoch": 0.4, "grad_norm": 2.7547468869911698, "learning_rate": 9.71894711810952e-06, "loss": 0.2331, "step": 1473 }, { "epoch": 0.4, "grad_norm": 2.7033246964211477, "learning_rate": 9.718459791938688e-06, "loss": 0.2171, "step": 1474 }, { "epoch": 0.4, "grad_norm": 2.631225181848083, "learning_rate": 9.717972055878088e-06, "loss": 0.2364, "step": 1475 }, { "epoch": 0.4, "grad_norm": 2.764094585563155, "learning_rate": 9.717483909970094e-06, "loss": 0.2474, "step": 1476 }, { "epoch": 0.4, "grad_norm": 3.075218439107802, "learning_rate": 9.716995354257103e-06, "loss": 0.2607, "step": 1477 }, { "epoch": 0.4, "grad_norm": 3.3546477061169537, "learning_rate": 9.71650638878156e-06, "loss": 0.25, "step": 1478 }, { "epoch": 0.4, "grad_norm": 2.633538440229838, "learning_rate": 9.716017013585942e-06, "loss": 0.243, "step": 1479 }, { "epoch": 0.4, "grad_norm": 2.871066080941459, "learning_rate": 9.71552722871276e-06, "loss": 0.2226, "step": 1480 }, { "epoch": 0.4, "grad_norm": 3.141714034012026, "learning_rate": 9.71503703420456e-06, "loss": 0.2607, "step": 1481 }, { "epoch": 0.4, "grad_norm": 2.6676819877574576, "learning_rate": 9.714546430103924e-06, "loss": 0.2089, "step": 1482 }, { "epoch": 0.4, "grad_norm": 3.9014722408892615, "learning_rate": 9.714055416453473e-06, "loss": 0.257, "step": 1483 }, { "epoch": 0.41, "grad_norm": 2.9449452523977913, "learning_rate": 9.713563993295862e-06, "loss": 0.2273, "step": 1484 }, { "epoch": 0.41, "grad_norm": 2.823252338309778, "learning_rate": 9.713072160673778e-06, "loss": 0.2413, "step": 1485 }, { "epoch": 0.41, "grad_norm": 2.4554738033481835, "learning_rate": 9.712579918629947e-06, "loss": 0.2045, "step": 1486 }, { "epoch": 0.41, "grad_norm": 2.9483760562896486, "learning_rate": 9.71208726720713e-06, "loss": 0.2386, "step": 1487 }, { "epoch": 0.41, "grad_norm": 2.9064400159585806, "learning_rate": 9.711594206448123e-06, "loss": 0.2531, "step": 1488 }, { "epoch": 0.41, "grad_norm": 2.458087638656289, "learning_rate": 9.711100736395758e-06, "loss": 0.2024, "step": 1489 }, { "epoch": 0.41, "grad_norm": 3.3667108872172418, "learning_rate": 9.710606857092903e-06, "loss": 0.2365, "step": 1490 }, { "epoch": 0.41, "grad_norm": 2.5168877621793206, "learning_rate": 9.71011256858246e-06, "loss": 0.2007, "step": 1491 }, { "epoch": 0.41, "grad_norm": 2.7584043646100014, "learning_rate": 9.709617870907368e-06, "loss": 0.2229, "step": 1492 }, { "epoch": 0.41, "grad_norm": 2.6134092135665807, "learning_rate": 9.7091227641106e-06, "loss": 0.1967, "step": 1493 }, { "epoch": 0.41, "grad_norm": 2.907023087104539, "learning_rate": 9.70862724823517e-06, "loss": 0.2718, "step": 1494 }, { "epoch": 0.41, "grad_norm": 2.7535465911622943, "learning_rate": 9.708131323324117e-06, "loss": 0.2009, "step": 1495 }, { "epoch": 0.41, "grad_norm": 2.790231369678342, "learning_rate": 9.707634989420525e-06, "loss": 0.2307, "step": 1496 }, { "epoch": 0.41, "grad_norm": 2.8397414552758313, "learning_rate": 9.707138246567511e-06, "loss": 0.201, "step": 1497 }, { "epoch": 0.41, "grad_norm": 2.8221899219787123, "learning_rate": 9.706641094808225e-06, "loss": 0.2221, "step": 1498 }, { "epoch": 0.41, "grad_norm": 2.6437144048989616, "learning_rate": 9.706143534185854e-06, "loss": 0.2189, "step": 1499 }, { "epoch": 0.41, "grad_norm": 2.9433347949629662, "learning_rate": 9.705645564743624e-06, "loss": 0.223, "step": 1500 }, { "epoch": 0.41, "grad_norm": 3.0510375045265796, "learning_rate": 9.70514718652479e-06, "loss": 0.2274, "step": 1501 }, { "epoch": 0.41, "grad_norm": 2.6224486945689525, "learning_rate": 9.704648399572646e-06, "loss": 0.203, "step": 1502 }, { "epoch": 0.41, "grad_norm": 3.417391792541818, "learning_rate": 9.704149203930522e-06, "loss": 0.2481, "step": 1503 }, { "epoch": 0.41, "grad_norm": 3.4776124613410446, "learning_rate": 9.703649599641785e-06, "loss": 0.2551, "step": 1504 }, { "epoch": 0.41, "grad_norm": 2.8031745721433508, "learning_rate": 9.703149586749832e-06, "loss": 0.2453, "step": 1505 }, { "epoch": 0.41, "grad_norm": 2.5799424036544383, "learning_rate": 9.702649165298098e-06, "loss": 0.2357, "step": 1506 }, { "epoch": 0.41, "grad_norm": 2.479118377704076, "learning_rate": 9.702148335330059e-06, "loss": 0.2113, "step": 1507 }, { "epoch": 0.41, "grad_norm": 2.957523446181792, "learning_rate": 9.70164709688922e-06, "loss": 0.2212, "step": 1508 }, { "epoch": 0.41, "grad_norm": 2.9003605433388135, "learning_rate": 9.70114545001912e-06, "loss": 0.2639, "step": 1509 }, { "epoch": 0.41, "grad_norm": 2.713050325946634, "learning_rate": 9.70064339476334e-06, "loss": 0.2176, "step": 1510 }, { "epoch": 0.41, "grad_norm": 2.7310746005294435, "learning_rate": 9.700140931165494e-06, "loss": 0.2402, "step": 1511 }, { "epoch": 0.41, "grad_norm": 2.6983111111860842, "learning_rate": 9.699638059269228e-06, "loss": 0.2153, "step": 1512 }, { "epoch": 0.41, "grad_norm": 2.9712006245722553, "learning_rate": 9.699134779118226e-06, "loss": 0.2361, "step": 1513 }, { "epoch": 0.41, "grad_norm": 3.0300691635280703, "learning_rate": 9.698631090756211e-06, "loss": 0.2609, "step": 1514 }, { "epoch": 0.41, "grad_norm": 2.9187837623461137, "learning_rate": 9.698126994226937e-06, "loss": 0.2418, "step": 1515 }, { "epoch": 0.41, "grad_norm": 2.7405932958273946, "learning_rate": 9.697622489574192e-06, "loss": 0.2039, "step": 1516 }, { "epoch": 0.41, "grad_norm": 3.1415524299115414, "learning_rate": 9.697117576841804e-06, "loss": 0.2364, "step": 1517 }, { "epoch": 0.41, "grad_norm": 2.9937275186872827, "learning_rate": 9.696612256073634e-06, "loss": 0.2328, "step": 1518 }, { "epoch": 0.41, "grad_norm": 2.5861772953906312, "learning_rate": 9.69610652731358e-06, "loss": 0.2163, "step": 1519 }, { "epoch": 0.41, "grad_norm": 3.00855388669083, "learning_rate": 9.695600390605573e-06, "loss": 0.2655, "step": 1520 }, { "epoch": 0.42, "grad_norm": 2.7628333744866693, "learning_rate": 9.69509384599358e-06, "loss": 0.2055, "step": 1521 }, { "epoch": 0.42, "grad_norm": 2.843548450669024, "learning_rate": 9.694586893521607e-06, "loss": 0.2172, "step": 1522 }, { "epoch": 0.42, "grad_norm": 2.8752611462885613, "learning_rate": 9.694079533233692e-06, "loss": 0.2449, "step": 1523 }, { "epoch": 0.42, "grad_norm": 3.036470017417834, "learning_rate": 9.693571765173907e-06, "loss": 0.2448, "step": 1524 }, { "epoch": 0.42, "grad_norm": 2.607871130670095, "learning_rate": 9.693063589386361e-06, "loss": 0.2293, "step": 1525 }, { "epoch": 0.42, "grad_norm": 2.70990746902915, "learning_rate": 9.692555005915203e-06, "loss": 0.1998, "step": 1526 }, { "epoch": 0.42, "grad_norm": 2.7762196484054082, "learning_rate": 9.69204601480461e-06, "loss": 0.234, "step": 1527 }, { "epoch": 0.42, "grad_norm": 3.027966924331917, "learning_rate": 9.6915366160988e-06, "loss": 0.2416, "step": 1528 }, { "epoch": 0.42, "grad_norm": 3.210748036034756, "learning_rate": 9.691026809842021e-06, "loss": 0.2393, "step": 1529 }, { "epoch": 0.42, "grad_norm": 2.714439055952504, "learning_rate": 9.690516596078563e-06, "loss": 0.2186, "step": 1530 }, { "epoch": 0.42, "grad_norm": 2.915272765772438, "learning_rate": 9.690005974852746e-06, "loss": 0.2209, "step": 1531 }, { "epoch": 0.42, "grad_norm": 2.854124562944047, "learning_rate": 9.689494946208926e-06, "loss": 0.2073, "step": 1532 }, { "epoch": 0.42, "grad_norm": 3.169506978735728, "learning_rate": 9.688983510191498e-06, "loss": 0.2587, "step": 1533 }, { "epoch": 0.42, "grad_norm": 2.7947211413714785, "learning_rate": 9.688471666844892e-06, "loss": 0.2265, "step": 1534 }, { "epoch": 0.42, "grad_norm": 2.7473053702464667, "learning_rate": 9.687959416213568e-06, "loss": 0.2014, "step": 1535 }, { "epoch": 0.42, "grad_norm": 2.88756032851917, "learning_rate": 9.687446758342025e-06, "loss": 0.2372, "step": 1536 }, { "epoch": 0.42, "grad_norm": 2.730201404766415, "learning_rate": 9.686933693274801e-06, "loss": 0.2281, "step": 1537 }, { "epoch": 0.42, "grad_norm": 2.856505415518159, "learning_rate": 9.68642022105646e-06, "loss": 0.2426, "step": 1538 }, { "epoch": 0.42, "grad_norm": 2.439966690248097, "learning_rate": 9.685906341731612e-06, "loss": 0.1989, "step": 1539 }, { "epoch": 0.42, "grad_norm": 2.534229191798588, "learning_rate": 9.685392055344894e-06, "loss": 0.2107, "step": 1540 }, { "epoch": 0.42, "grad_norm": 2.5937486719178553, "learning_rate": 9.684877361940985e-06, "loss": 0.2458, "step": 1541 }, { "epoch": 0.42, "grad_norm": 2.9766471585050844, "learning_rate": 9.684362261564593e-06, "loss": 0.2561, "step": 1542 }, { "epoch": 0.42, "grad_norm": 2.7285978301285367, "learning_rate": 9.683846754260467e-06, "loss": 0.2318, "step": 1543 }, { "epoch": 0.42, "grad_norm": 2.6490859759036534, "learning_rate": 9.683330840073385e-06, "loss": 0.1993, "step": 1544 }, { "epoch": 0.42, "grad_norm": 2.6304394020217807, "learning_rate": 9.68281451904817e-06, "loss": 0.2009, "step": 1545 }, { "epoch": 0.42, "grad_norm": 2.82355891124952, "learning_rate": 9.682297791229668e-06, "loss": 0.2356, "step": 1546 }, { "epoch": 0.42, "grad_norm": 2.8447876460376453, "learning_rate": 9.681780656662773e-06, "loss": 0.2159, "step": 1547 }, { "epoch": 0.42, "grad_norm": 2.8854951639768993, "learning_rate": 9.681263115392403e-06, "loss": 0.2066, "step": 1548 }, { "epoch": 0.42, "grad_norm": 2.874817457093106, "learning_rate": 9.68074516746352e-06, "loss": 0.2154, "step": 1549 }, { "epoch": 0.42, "grad_norm": 2.6554675075569456, "learning_rate": 9.680226812921117e-06, "loss": 0.2369, "step": 1550 }, { "epoch": 0.42, "grad_norm": 2.8269212168665376, "learning_rate": 9.679708051810222e-06, "loss": 0.2609, "step": 1551 }, { "epoch": 0.42, "grad_norm": 2.425471588617914, "learning_rate": 9.679188884175899e-06, "loss": 0.1827, "step": 1552 }, { "epoch": 0.42, "grad_norm": 3.043539366403406, "learning_rate": 9.67866931006325e-06, "loss": 0.2295, "step": 1553 }, { "epoch": 0.42, "grad_norm": 2.944256704520571, "learning_rate": 9.67814932951741e-06, "loss": 0.2426, "step": 1554 }, { "epoch": 0.42, "grad_norm": 2.756378070320021, "learning_rate": 9.677628942583546e-06, "loss": 0.2047, "step": 1555 }, { "epoch": 0.42, "grad_norm": 2.4898495542831416, "learning_rate": 9.677108149306868e-06, "loss": 0.2083, "step": 1556 }, { "epoch": 0.43, "grad_norm": 2.992807003069182, "learning_rate": 9.676586949732616e-06, "loss": 0.2384, "step": 1557 }, { "epoch": 0.43, "grad_norm": 2.668642306173486, "learning_rate": 9.676065343906062e-06, "loss": 0.2189, "step": 1558 }, { "epoch": 0.43, "grad_norm": 2.631895639127102, "learning_rate": 9.675543331872525e-06, "loss": 0.2016, "step": 1559 }, { "epoch": 0.43, "grad_norm": 3.0890160569099017, "learning_rate": 9.675020913677345e-06, "loss": 0.2293, "step": 1560 }, { "epoch": 0.43, "grad_norm": 2.9820345126258188, "learning_rate": 9.67449808936591e-06, "loss": 0.2375, "step": 1561 }, { "epoch": 0.43, "grad_norm": 2.6081883274574142, "learning_rate": 9.673974858983632e-06, "loss": 0.2214, "step": 1562 }, { "epoch": 0.43, "grad_norm": 3.013177033536362, "learning_rate": 9.673451222575966e-06, "loss": 0.2515, "step": 1563 }, { "epoch": 0.43, "grad_norm": 2.9825613183054993, "learning_rate": 9.672927180188401e-06, "loss": 0.2605, "step": 1564 }, { "epoch": 0.43, "grad_norm": 3.1700277369011536, "learning_rate": 9.67240273186646e-06, "loss": 0.2524, "step": 1565 }, { "epoch": 0.43, "grad_norm": 2.5923557566562927, "learning_rate": 9.6718778776557e-06, "loss": 0.2158, "step": 1566 }, { "epoch": 0.43, "grad_norm": 2.759605462143463, "learning_rate": 9.671352617601714e-06, "loss": 0.2533, "step": 1567 }, { "epoch": 0.43, "grad_norm": 2.5169542079163154, "learning_rate": 9.670826951750136e-06, "loss": 0.2398, "step": 1568 }, { "epoch": 0.43, "grad_norm": 2.7234993794414883, "learning_rate": 9.670300880146626e-06, "loss": 0.2121, "step": 1569 }, { "epoch": 0.43, "grad_norm": 2.9397000374259643, "learning_rate": 9.669774402836883e-06, "loss": 0.2187, "step": 1570 }, { "epoch": 0.43, "grad_norm": 2.7198101335816003, "learning_rate": 9.669247519866645e-06, "loss": 0.2316, "step": 1571 }, { "epoch": 0.43, "grad_norm": 3.008983223363173, "learning_rate": 9.66872023128168e-06, "loss": 0.2436, "step": 1572 }, { "epoch": 0.43, "grad_norm": 2.9451381597613864, "learning_rate": 9.668192537127793e-06, "loss": 0.2249, "step": 1573 }, { "epoch": 0.43, "grad_norm": 2.955902351616932, "learning_rate": 9.667664437450825e-06, "loss": 0.2404, "step": 1574 }, { "epoch": 0.43, "grad_norm": 2.903320825647778, "learning_rate": 9.667135932296653e-06, "loss": 0.228, "step": 1575 }, { "epoch": 0.43, "grad_norm": 2.4170988950285763, "learning_rate": 9.666607021711185e-06, "loss": 0.2205, "step": 1576 }, { "epoch": 0.43, "grad_norm": 2.9882596573129945, "learning_rate": 9.666077705740368e-06, "loss": 0.2564, "step": 1577 }, { "epoch": 0.43, "grad_norm": 2.5911391720617365, "learning_rate": 9.665547984430186e-06, "loss": 0.2126, "step": 1578 }, { "epoch": 0.43, "grad_norm": 2.8208683827079954, "learning_rate": 9.665017857826654e-06, "loss": 0.2428, "step": 1579 }, { "epoch": 0.43, "grad_norm": 3.0019933775974867, "learning_rate": 9.664487325975822e-06, "loss": 0.2649, "step": 1580 }, { "epoch": 0.43, "grad_norm": 2.814295091275241, "learning_rate": 9.663956388923779e-06, "loss": 0.2386, "step": 1581 }, { "epoch": 0.43, "grad_norm": 2.3918956147036896, "learning_rate": 9.663425046716648e-06, "loss": 0.1696, "step": 1582 }, { "epoch": 0.43, "grad_norm": 2.671806418716493, "learning_rate": 9.662893299400585e-06, "loss": 0.2073, "step": 1583 }, { "epoch": 0.43, "grad_norm": 2.6733044625154556, "learning_rate": 9.66236114702178e-06, "loss": 0.2077, "step": 1584 }, { "epoch": 0.43, "grad_norm": 3.212690294082634, "learning_rate": 9.661828589626465e-06, "loss": 0.2163, "step": 1585 }, { "epoch": 0.43, "grad_norm": 2.9582787369390062, "learning_rate": 9.661295627260901e-06, "loss": 0.236, "step": 1586 }, { "epoch": 0.43, "grad_norm": 2.7211921678156132, "learning_rate": 9.660762259971386e-06, "loss": 0.2061, "step": 1587 }, { "epoch": 0.43, "grad_norm": 3.045993606123908, "learning_rate": 9.660228487804254e-06, "loss": 0.2402, "step": 1588 }, { "epoch": 0.43, "grad_norm": 2.6107671378707376, "learning_rate": 9.659694310805874e-06, "loss": 0.2166, "step": 1589 }, { "epoch": 0.43, "grad_norm": 2.854306288419431, "learning_rate": 9.659159729022649e-06, "loss": 0.2114, "step": 1590 }, { "epoch": 0.43, "grad_norm": 3.1905254612518243, "learning_rate": 9.658624742501018e-06, "loss": 0.2499, "step": 1591 }, { "epoch": 0.43, "grad_norm": 2.7649113408904396, "learning_rate": 9.658089351287452e-06, "loss": 0.205, "step": 1592 }, { "epoch": 0.43, "grad_norm": 2.80746686082814, "learning_rate": 9.657553555428464e-06, "loss": 0.1937, "step": 1593 }, { "epoch": 0.44, "grad_norm": 2.808417049743047, "learning_rate": 9.657017354970597e-06, "loss": 0.2642, "step": 1594 }, { "epoch": 0.44, "grad_norm": 2.7248858319197664, "learning_rate": 9.65648074996043e-06, "loss": 0.232, "step": 1595 }, { "epoch": 0.44, "grad_norm": 2.5268189226850692, "learning_rate": 9.655943740444579e-06, "loss": 0.213, "step": 1596 }, { "epoch": 0.44, "grad_norm": 2.6470000281531716, "learning_rate": 9.655406326469692e-06, "loss": 0.2374, "step": 1597 }, { "epoch": 0.44, "grad_norm": 2.728736640772866, "learning_rate": 9.654868508082455e-06, "loss": 0.2287, "step": 1598 }, { "epoch": 0.44, "grad_norm": 2.7662517925685073, "learning_rate": 9.654330285329586e-06, "loss": 0.2324, "step": 1599 }, { "epoch": 0.44, "grad_norm": 2.8220906486926056, "learning_rate": 9.653791658257843e-06, "loss": 0.2254, "step": 1600 }, { "epoch": 0.44, "grad_norm": 2.5777244323724395, "learning_rate": 9.653252626914014e-06, "loss": 0.23, "step": 1601 }, { "epoch": 0.44, "grad_norm": 2.8844888343714232, "learning_rate": 9.652713191344925e-06, "loss": 0.2502, "step": 1602 }, { "epoch": 0.44, "grad_norm": 2.807909610431765, "learning_rate": 9.652173351597435e-06, "loss": 0.2084, "step": 1603 }, { "epoch": 0.44, "grad_norm": 2.3886962429509206, "learning_rate": 9.651633107718443e-06, "loss": 0.2113, "step": 1604 }, { "epoch": 0.44, "grad_norm": 2.6827822906980847, "learning_rate": 9.651092459754879e-06, "loss": 0.208, "step": 1605 }, { "epoch": 0.44, "grad_norm": 2.7765037544936737, "learning_rate": 9.650551407753705e-06, "loss": 0.227, "step": 1606 }, { "epoch": 0.44, "grad_norm": 2.858755730947863, "learning_rate": 9.650009951761926e-06, "loss": 0.2541, "step": 1607 }, { "epoch": 0.44, "grad_norm": 2.5849049792674657, "learning_rate": 9.649468091826575e-06, "loss": 0.2027, "step": 1608 }, { "epoch": 0.44, "grad_norm": 2.930380377264086, "learning_rate": 9.648925827994725e-06, "loss": 0.2327, "step": 1609 }, { "epoch": 0.44, "grad_norm": 2.3570562933030295, "learning_rate": 9.64838316031348e-06, "loss": 0.1835, "step": 1610 }, { "epoch": 0.44, "grad_norm": 2.6838342972952565, "learning_rate": 9.647840088829984e-06, "loss": 0.1922, "step": 1611 }, { "epoch": 0.44, "grad_norm": 2.932592011487064, "learning_rate": 9.647296613591411e-06, "loss": 0.2225, "step": 1612 }, { "epoch": 0.44, "grad_norm": 2.7003931785238957, "learning_rate": 9.646752734644974e-06, "loss": 0.1956, "step": 1613 }, { "epoch": 0.44, "grad_norm": 2.656601331116525, "learning_rate": 9.646208452037919e-06, "loss": 0.223, "step": 1614 }, { "epoch": 0.44, "grad_norm": 2.805961420413957, "learning_rate": 9.645663765817528e-06, "loss": 0.1947, "step": 1615 }, { "epoch": 0.44, "grad_norm": 2.718423148939623, "learning_rate": 9.645118676031115e-06, "loss": 0.2278, "step": 1616 }, { "epoch": 0.44, "grad_norm": 3.023230417850136, "learning_rate": 9.644573182726035e-06, "loss": 0.2083, "step": 1617 }, { "epoch": 0.44, "grad_norm": 2.648343553392819, "learning_rate": 9.644027285949673e-06, "loss": 0.2267, "step": 1618 }, { "epoch": 0.44, "grad_norm": 2.896398420833112, "learning_rate": 9.64348098574945e-06, "loss": 0.2563, "step": 1619 }, { "epoch": 0.44, "grad_norm": 2.5017012795932616, "learning_rate": 9.642934282172824e-06, "loss": 0.1827, "step": 1620 }, { "epoch": 0.44, "grad_norm": 2.7858196091171687, "learning_rate": 9.642387175267285e-06, "loss": 0.2329, "step": 1621 }, { "epoch": 0.44, "grad_norm": 2.8165940319032696, "learning_rate": 9.641839665080363e-06, "loss": 0.2383, "step": 1622 }, { "epoch": 0.44, "grad_norm": 2.806771898505537, "learning_rate": 9.641291751659618e-06, "loss": 0.1852, "step": 1623 }, { "epoch": 0.44, "grad_norm": 2.7910379629398294, "learning_rate": 9.640743435052647e-06, "loss": 0.2341, "step": 1624 }, { "epoch": 0.44, "grad_norm": 2.9556565949101397, "learning_rate": 9.640194715307083e-06, "loss": 0.2312, "step": 1625 }, { "epoch": 0.44, "grad_norm": 2.7559178244254405, "learning_rate": 9.63964559247059e-06, "loss": 0.2431, "step": 1626 }, { "epoch": 0.44, "grad_norm": 2.8777072599656943, "learning_rate": 9.639096066590874e-06, "loss": 0.2141, "step": 1627 }, { "epoch": 0.44, "grad_norm": 2.886249274960757, "learning_rate": 9.638546137715668e-06, "loss": 0.2404, "step": 1628 }, { "epoch": 0.44, "grad_norm": 2.7941362488188304, "learning_rate": 9.637995805892746e-06, "loss": 0.2312, "step": 1629 }, { "epoch": 0.44, "grad_norm": 2.4425025204901574, "learning_rate": 9.637445071169917e-06, "loss": 0.2023, "step": 1630 }, { "epoch": 0.45, "grad_norm": 2.6747070071904537, "learning_rate": 9.63689393359502e-06, "loss": 0.2143, "step": 1631 }, { "epoch": 0.45, "grad_norm": 2.7787332078860385, "learning_rate": 9.636342393215931e-06, "loss": 0.2214, "step": 1632 }, { "epoch": 0.45, "grad_norm": 2.7512638548427333, "learning_rate": 9.635790450080566e-06, "loss": 0.24, "step": 1633 }, { "epoch": 0.45, "grad_norm": 2.9045180141424494, "learning_rate": 9.63523810423687e-06, "loss": 0.2277, "step": 1634 }, { "epoch": 0.45, "grad_norm": 2.6887185550995873, "learning_rate": 9.634685355732823e-06, "loss": 0.1834, "step": 1635 }, { "epoch": 0.45, "grad_norm": 2.7723511221897974, "learning_rate": 9.634132204616444e-06, "loss": 0.2401, "step": 1636 }, { "epoch": 0.45, "grad_norm": 2.811919194203948, "learning_rate": 9.633578650935786e-06, "loss": 0.2275, "step": 1637 }, { "epoch": 0.45, "grad_norm": 2.728784944560048, "learning_rate": 9.633024694738934e-06, "loss": 0.228, "step": 1638 }, { "epoch": 0.45, "grad_norm": 2.59291729228635, "learning_rate": 9.632470336074009e-06, "loss": 0.2012, "step": 1639 }, { "epoch": 0.45, "grad_norm": 2.77904935255569, "learning_rate": 9.631915574989171e-06, "loss": 0.2443, "step": 1640 }, { "epoch": 0.45, "grad_norm": 3.2218525147178343, "learning_rate": 9.631360411532609e-06, "loss": 0.2696, "step": 1641 }, { "epoch": 0.45, "grad_norm": 2.920292090498311, "learning_rate": 9.63080484575255e-06, "loss": 0.2459, "step": 1642 }, { "epoch": 0.45, "grad_norm": 2.5625603959113703, "learning_rate": 9.630248877697259e-06, "loss": 0.2307, "step": 1643 }, { "epoch": 0.45, "grad_norm": 2.5111587966174493, "learning_rate": 9.629692507415028e-06, "loss": 0.1924, "step": 1644 }, { "epoch": 0.45, "grad_norm": 2.9221839124698574, "learning_rate": 9.62913573495419e-06, "loss": 0.239, "step": 1645 }, { "epoch": 0.45, "grad_norm": 2.865562667929123, "learning_rate": 9.628578560363113e-06, "loss": 0.233, "step": 1646 }, { "epoch": 0.45, "grad_norm": 2.8832287396785765, "learning_rate": 9.628020983690197e-06, "loss": 0.2277, "step": 1647 }, { "epoch": 0.45, "grad_norm": 2.6577319695219286, "learning_rate": 9.627463004983877e-06, "loss": 0.2019, "step": 1648 }, { "epoch": 0.45, "grad_norm": 2.6324502101358576, "learning_rate": 9.626904624292629e-06, "loss": 0.2289, "step": 1649 }, { "epoch": 0.45, "grad_norm": 2.7031058190857595, "learning_rate": 9.626345841664953e-06, "loss": 0.2187, "step": 1650 }, { "epoch": 0.45, "grad_norm": 2.319627220530469, "learning_rate": 9.625786657149396e-06, "loss": 0.1619, "step": 1651 }, { "epoch": 0.45, "grad_norm": 2.9793494221380317, "learning_rate": 9.625227070794529e-06, "loss": 0.2328, "step": 1652 }, { "epoch": 0.45, "grad_norm": 3.0654668294341554, "learning_rate": 9.624667082648966e-06, "loss": 0.2422, "step": 1653 }, { "epoch": 0.45, "grad_norm": 2.816926895241324, "learning_rate": 9.624106692761354e-06, "loss": 0.238, "step": 1654 }, { "epoch": 0.45, "grad_norm": 2.7852834344810513, "learning_rate": 9.62354590118037e-06, "loss": 0.242, "step": 1655 }, { "epoch": 0.45, "grad_norm": 2.9036212936149406, "learning_rate": 9.622984707954732e-06, "loss": 0.2352, "step": 1656 }, { "epoch": 0.45, "grad_norm": 2.44481206178112, "learning_rate": 9.62242311313319e-06, "loss": 0.2284, "step": 1657 }, { "epoch": 0.45, "grad_norm": 2.365521546707559, "learning_rate": 9.621861116764529e-06, "loss": 0.1952, "step": 1658 }, { "epoch": 0.45, "grad_norm": 2.5175533394484084, "learning_rate": 9.621298718897569e-06, "loss": 0.2103, "step": 1659 }, { "epoch": 0.45, "grad_norm": 3.103193049363998, "learning_rate": 9.620735919581168e-06, "loss": 0.2683, "step": 1660 }, { "epoch": 0.45, "grad_norm": 2.714802773138769, "learning_rate": 9.620172718864213e-06, "loss": 0.2482, "step": 1661 }, { "epoch": 0.45, "grad_norm": 2.5411590433396687, "learning_rate": 9.619609116795628e-06, "loss": 0.2071, "step": 1662 }, { "epoch": 0.45, "grad_norm": 2.6126050466901454, "learning_rate": 9.619045113424376e-06, "loss": 0.2201, "step": 1663 }, { "epoch": 0.45, "grad_norm": 2.368055393964109, "learning_rate": 9.61848070879945e-06, "loss": 0.1888, "step": 1664 }, { "epoch": 0.45, "grad_norm": 2.7815542197529353, "learning_rate": 9.617915902969879e-06, "loss": 0.2612, "step": 1665 }, { "epoch": 0.45, "grad_norm": 2.543535657927365, "learning_rate": 9.61735069598473e-06, "loss": 0.2546, "step": 1666 }, { "epoch": 0.46, "grad_norm": 2.7180204939689463, "learning_rate": 9.616785087893099e-06, "loss": 0.2204, "step": 1667 }, { "epoch": 0.46, "grad_norm": 3.101084274457579, "learning_rate": 9.61621907874412e-06, "loss": 0.281, "step": 1668 }, { "epoch": 0.46, "grad_norm": 2.607747626404295, "learning_rate": 9.615652668586965e-06, "loss": 0.2186, "step": 1669 }, { "epoch": 0.46, "grad_norm": 3.213761805253613, "learning_rate": 9.615085857470835e-06, "loss": 0.2085, "step": 1670 }, { "epoch": 0.46, "grad_norm": 2.6923825329172315, "learning_rate": 9.61451864544497e-06, "loss": 0.2249, "step": 1671 }, { "epoch": 0.46, "grad_norm": 2.762454031205449, "learning_rate": 9.613951032558641e-06, "loss": 0.2218, "step": 1672 }, { "epoch": 0.46, "grad_norm": 2.5951883320325795, "learning_rate": 9.613383018861159e-06, "loss": 0.2274, "step": 1673 }, { "epoch": 0.46, "grad_norm": 2.736871570936625, "learning_rate": 9.612814604401868e-06, "loss": 0.2099, "step": 1674 }, { "epoch": 0.46, "grad_norm": 2.5091108176230206, "learning_rate": 9.61224578923014e-06, "loss": 0.2109, "step": 1675 }, { "epoch": 0.46, "grad_norm": 2.608786554200097, "learning_rate": 9.611676573395394e-06, "loss": 0.1975, "step": 1676 }, { "epoch": 0.46, "grad_norm": 2.6353856472027637, "learning_rate": 9.611106956947073e-06, "loss": 0.2314, "step": 1677 }, { "epoch": 0.46, "grad_norm": 3.1613038342922284, "learning_rate": 9.610536939934663e-06, "loss": 0.2529, "step": 1678 }, { "epoch": 0.46, "grad_norm": 2.555757455711037, "learning_rate": 9.609966522407678e-06, "loss": 0.1815, "step": 1679 }, { "epoch": 0.46, "grad_norm": 3.2152204753078175, "learning_rate": 9.609395704415672e-06, "loss": 0.2457, "step": 1680 }, { "epoch": 0.46, "grad_norm": 2.827473896740029, "learning_rate": 9.608824486008228e-06, "loss": 0.2327, "step": 1681 }, { "epoch": 0.46, "grad_norm": 2.6376256633618986, "learning_rate": 9.608252867234972e-06, "loss": 0.2351, "step": 1682 }, { "epoch": 0.46, "grad_norm": 2.6236339696338247, "learning_rate": 9.607680848145557e-06, "loss": 0.2151, "step": 1683 }, { "epoch": 0.46, "grad_norm": 2.8409205946611094, "learning_rate": 9.607108428789677e-06, "loss": 0.2346, "step": 1684 }, { "epoch": 0.46, "grad_norm": 2.717881716079471, "learning_rate": 9.606535609217054e-06, "loss": 0.2186, "step": 1685 }, { "epoch": 0.46, "grad_norm": 2.7396456134689036, "learning_rate": 9.60596238947745e-06, "loss": 0.2328, "step": 1686 }, { "epoch": 0.46, "grad_norm": 3.100764757098075, "learning_rate": 9.605388769620663e-06, "loss": 0.2555, "step": 1687 }, { "epoch": 0.46, "grad_norm": 2.4749698347744378, "learning_rate": 9.604814749696519e-06, "loss": 0.1782, "step": 1688 }, { "epoch": 0.46, "grad_norm": 2.770567729618977, "learning_rate": 9.604240329754883e-06, "loss": 0.2484, "step": 1689 }, { "epoch": 0.46, "grad_norm": 2.42417640332674, "learning_rate": 9.603665509845657e-06, "loss": 0.1926, "step": 1690 }, { "epoch": 0.46, "grad_norm": 2.519824351316186, "learning_rate": 9.603090290018774e-06, "loss": 0.2179, "step": 1691 }, { "epoch": 0.46, "grad_norm": 3.2505042582116315, "learning_rate": 9.602514670324204e-06, "loss": 0.2687, "step": 1692 }, { "epoch": 0.46, "grad_norm": 2.849312214312834, "learning_rate": 9.601938650811949e-06, "loss": 0.2332, "step": 1693 }, { "epoch": 0.46, "grad_norm": 2.872267799669223, "learning_rate": 9.601362231532047e-06, "loss": 0.2251, "step": 1694 }, { "epoch": 0.46, "grad_norm": 2.5632923872826976, "learning_rate": 9.600785412534575e-06, "loss": 0.18, "step": 1695 }, { "epoch": 0.46, "grad_norm": 3.1272359230488287, "learning_rate": 9.600208193869638e-06, "loss": 0.2634, "step": 1696 }, { "epoch": 0.46, "grad_norm": 2.720416841863935, "learning_rate": 9.599630575587378e-06, "loss": 0.2307, "step": 1697 }, { "epoch": 0.46, "grad_norm": 2.4673292796001958, "learning_rate": 9.599052557737973e-06, "loss": 0.2184, "step": 1698 }, { "epoch": 0.46, "grad_norm": 2.8346451288445516, "learning_rate": 9.598474140371637e-06, "loss": 0.2562, "step": 1699 }, { "epoch": 0.46, "grad_norm": 2.548638751653133, "learning_rate": 9.597895323538615e-06, "loss": 0.1979, "step": 1700 }, { "epoch": 0.46, "grad_norm": 2.742428112530759, "learning_rate": 9.597316107289187e-06, "loss": 0.1939, "step": 1701 }, { "epoch": 0.46, "grad_norm": 2.6337741204738876, "learning_rate": 9.596736491673674e-06, "loss": 0.1961, "step": 1702 }, { "epoch": 0.46, "grad_norm": 2.6800166273471153, "learning_rate": 9.596156476742419e-06, "loss": 0.2497, "step": 1703 }, { "epoch": 0.47, "grad_norm": 2.868938335305424, "learning_rate": 9.595576062545817e-06, "loss": 0.2305, "step": 1704 }, { "epoch": 0.47, "grad_norm": 2.600370957828557, "learning_rate": 9.59499524913428e-06, "loss": 0.235, "step": 1705 }, { "epoch": 0.47, "grad_norm": 3.263633374493889, "learning_rate": 9.594414036558268e-06, "loss": 0.2204, "step": 1706 }, { "epoch": 0.47, "grad_norm": 2.7651879039745113, "learning_rate": 9.593832424868271e-06, "loss": 0.224, "step": 1707 }, { "epoch": 0.47, "grad_norm": 3.009188856854406, "learning_rate": 9.59325041411481e-06, "loss": 0.2195, "step": 1708 }, { "epoch": 0.47, "grad_norm": 2.8061359410692273, "learning_rate": 9.592668004348443e-06, "loss": 0.2083, "step": 1709 }, { "epoch": 0.47, "grad_norm": 2.6936061605824193, "learning_rate": 9.592085195619767e-06, "loss": 0.2064, "step": 1710 }, { "epoch": 0.47, "grad_norm": 2.930877583884321, "learning_rate": 9.59150198797941e-06, "loss": 0.2395, "step": 1711 }, { "epoch": 0.47, "grad_norm": 2.6239550359661443, "learning_rate": 9.590918381478034e-06, "loss": 0.2228, "step": 1712 }, { "epoch": 0.47, "grad_norm": 3.7394403666584504, "learning_rate": 9.590334376166334e-06, "loss": 0.235, "step": 1713 }, { "epoch": 0.47, "grad_norm": 3.1226436039606473, "learning_rate": 9.589749972095048e-06, "loss": 0.2177, "step": 1714 }, { "epoch": 0.47, "grad_norm": 2.549593904820042, "learning_rate": 9.589165169314938e-06, "loss": 0.207, "step": 1715 }, { "epoch": 0.47, "grad_norm": 2.8047872809286365, "learning_rate": 9.588579967876806e-06, "loss": 0.2455, "step": 1716 }, { "epoch": 0.47, "grad_norm": 3.152412334473281, "learning_rate": 9.58799436783149e-06, "loss": 0.2338, "step": 1717 }, { "epoch": 0.47, "grad_norm": 2.6383426670233634, "learning_rate": 9.58740836922986e-06, "loss": 0.201, "step": 1718 }, { "epoch": 0.47, "grad_norm": 2.5664408059758856, "learning_rate": 9.586821972122822e-06, "loss": 0.1984, "step": 1719 }, { "epoch": 0.47, "grad_norm": 2.616884817499784, "learning_rate": 9.586235176561313e-06, "loss": 0.2146, "step": 1720 }, { "epoch": 0.47, "grad_norm": 2.7720308313210884, "learning_rate": 9.58564798259631e-06, "loss": 0.2363, "step": 1721 }, { "epoch": 0.47, "grad_norm": 2.519023353478564, "learning_rate": 9.585060390278824e-06, "loss": 0.1855, "step": 1722 }, { "epoch": 0.47, "grad_norm": 3.1360082248795536, "learning_rate": 9.584472399659895e-06, "loss": 0.2539, "step": 1723 }, { "epoch": 0.47, "grad_norm": 2.6233907807963837, "learning_rate": 9.583884010790605e-06, "loss": 0.2038, "step": 1724 }, { "epoch": 0.47, "grad_norm": 2.799187463421666, "learning_rate": 9.583295223722062e-06, "loss": 0.2399, "step": 1725 }, { "epoch": 0.47, "grad_norm": 2.7401234009096167, "learning_rate": 9.582706038505418e-06, "loss": 0.2465, "step": 1726 }, { "epoch": 0.47, "grad_norm": 2.402983256992335, "learning_rate": 9.582116455191855e-06, "loss": 0.2172, "step": 1727 }, { "epoch": 0.47, "grad_norm": 2.6216783934659604, "learning_rate": 9.581526473832585e-06, "loss": 0.247, "step": 1728 }, { "epoch": 0.47, "grad_norm": 3.0130415521406606, "learning_rate": 9.580936094478865e-06, "loss": 0.22, "step": 1729 }, { "epoch": 0.47, "grad_norm": 2.6757854479097527, "learning_rate": 9.58034531718198e-06, "loss": 0.2159, "step": 1730 }, { "epoch": 0.47, "grad_norm": 2.719381500605708, "learning_rate": 9.579754141993247e-06, "loss": 0.2212, "step": 1731 }, { "epoch": 0.47, "grad_norm": 2.9003634265410105, "learning_rate": 9.579162568964025e-06, "loss": 0.2039, "step": 1732 }, { "epoch": 0.47, "grad_norm": 2.818688851805939, "learning_rate": 9.578570598145702e-06, "loss": 0.2317, "step": 1733 }, { "epoch": 0.47, "grad_norm": 2.798901368085829, "learning_rate": 9.577978229589702e-06, "loss": 0.2015, "step": 1734 }, { "epoch": 0.47, "grad_norm": 2.759259072866002, "learning_rate": 9.577385463347481e-06, "loss": 0.2292, "step": 1735 }, { "epoch": 0.47, "grad_norm": 3.0502353983735446, "learning_rate": 9.576792299470537e-06, "loss": 0.2408, "step": 1736 }, { "epoch": 0.47, "grad_norm": 2.756578316101421, "learning_rate": 9.576198738010396e-06, "loss": 0.1682, "step": 1737 }, { "epoch": 0.47, "grad_norm": 3.011501895584955, "learning_rate": 9.57560477901862e-06, "loss": 0.2237, "step": 1738 }, { "epoch": 0.47, "grad_norm": 2.735995439685285, "learning_rate": 9.575010422546805e-06, "loss": 0.2332, "step": 1739 }, { "epoch": 0.48, "grad_norm": 2.810411185932834, "learning_rate": 9.574415668646584e-06, "loss": 0.2217, "step": 1740 }, { "epoch": 0.48, "grad_norm": 2.6521338537732992, "learning_rate": 9.573820517369623e-06, "loss": 0.2205, "step": 1741 }, { "epoch": 0.48, "grad_norm": 2.6750502634585613, "learning_rate": 9.57322496876762e-06, "loss": 0.2126, "step": 1742 }, { "epoch": 0.48, "grad_norm": 2.644547247288957, "learning_rate": 9.572629022892312e-06, "loss": 0.2061, "step": 1743 }, { "epoch": 0.48, "grad_norm": 2.5504708251215007, "learning_rate": 9.572032679795469e-06, "loss": 0.2204, "step": 1744 }, { "epoch": 0.48, "grad_norm": 4.419006548996466, "learning_rate": 9.571435939528893e-06, "loss": 0.2386, "step": 1745 }, { "epoch": 0.48, "grad_norm": 2.6413667336199613, "learning_rate": 9.570838802144425e-06, "loss": 0.1994, "step": 1746 }, { "epoch": 0.48, "grad_norm": 3.0519880627966605, "learning_rate": 9.570241267693935e-06, "loss": 0.2604, "step": 1747 }, { "epoch": 0.48, "grad_norm": 2.7924615919702345, "learning_rate": 9.569643336229334e-06, "loss": 0.2232, "step": 1748 }, { "epoch": 0.48, "grad_norm": 2.507699391405858, "learning_rate": 9.569045007802558e-06, "loss": 0.2198, "step": 1749 }, { "epoch": 0.48, "grad_norm": 2.868233561057278, "learning_rate": 9.568446282465592e-06, "loss": 0.2359, "step": 1750 }, { "epoch": 0.48, "grad_norm": 3.3797216668233028, "learning_rate": 9.567847160270438e-06, "loss": 0.2217, "step": 1751 }, { "epoch": 0.48, "grad_norm": 2.637712540644408, "learning_rate": 9.567247641269148e-06, "loss": 0.1944, "step": 1752 }, { "epoch": 0.48, "grad_norm": 2.8500124457445506, "learning_rate": 9.566647725513799e-06, "loss": 0.2313, "step": 1753 }, { "epoch": 0.48, "grad_norm": 2.69311795556285, "learning_rate": 9.566047413056506e-06, "loss": 0.1994, "step": 1754 }, { "epoch": 0.48, "grad_norm": 2.663547303732506, "learning_rate": 9.565446703949417e-06, "loss": 0.2259, "step": 1755 }, { "epoch": 0.48, "grad_norm": 2.705919407798083, "learning_rate": 9.564845598244717e-06, "loss": 0.2496, "step": 1756 }, { "epoch": 0.48, "grad_norm": 2.4229237870561717, "learning_rate": 9.564244095994621e-06, "loss": 0.1889, "step": 1757 }, { "epoch": 0.48, "grad_norm": 2.3953847492794917, "learning_rate": 9.563642197251382e-06, "loss": 0.1879, "step": 1758 }, { "epoch": 0.48, "grad_norm": 2.5483154714727347, "learning_rate": 9.563039902067288e-06, "loss": 0.1781, "step": 1759 }, { "epoch": 0.48, "grad_norm": 2.6083217487177626, "learning_rate": 9.56243721049466e-06, "loss": 0.2401, "step": 1760 }, { "epoch": 0.48, "grad_norm": 2.4330218004324977, "learning_rate": 9.561834122585854e-06, "loss": 0.2033, "step": 1761 }, { "epoch": 0.48, "grad_norm": 2.647433185175119, "learning_rate": 9.561230638393255e-06, "loss": 0.1895, "step": 1762 }, { "epoch": 0.48, "grad_norm": 2.351384241310298, "learning_rate": 9.560626757969294e-06, "loss": 0.1909, "step": 1763 }, { "epoch": 0.48, "grad_norm": 2.7308059895075685, "learning_rate": 9.560022481366424e-06, "loss": 0.2479, "step": 1764 }, { "epoch": 0.48, "grad_norm": 3.099206116735547, "learning_rate": 9.559417808637144e-06, "loss": 0.2058, "step": 1765 }, { "epoch": 0.48, "grad_norm": 2.7411520102567613, "learning_rate": 9.558812739833976e-06, "loss": 0.2017, "step": 1766 }, { "epoch": 0.48, "grad_norm": 2.6059707581758467, "learning_rate": 9.558207275009484e-06, "loss": 0.224, "step": 1767 }, { "epoch": 0.48, "grad_norm": 2.699417109489021, "learning_rate": 9.557601414216266e-06, "loss": 0.2174, "step": 1768 }, { "epoch": 0.48, "grad_norm": 3.2704084779433273, "learning_rate": 9.55699515750695e-06, "loss": 0.2148, "step": 1769 }, { "epoch": 0.48, "grad_norm": 2.5127217709894043, "learning_rate": 9.556388504934205e-06, "loss": 0.1815, "step": 1770 }, { "epoch": 0.48, "grad_norm": 2.9608885098882243, "learning_rate": 9.555781456550725e-06, "loss": 0.2227, "step": 1771 }, { "epoch": 0.48, "grad_norm": 2.742090598088731, "learning_rate": 9.55517401240925e-06, "loss": 0.2221, "step": 1772 }, { "epoch": 0.48, "grad_norm": 2.545091765276031, "learning_rate": 9.554566172562543e-06, "loss": 0.2099, "step": 1773 }, { "epoch": 0.48, "grad_norm": 2.982152951649236, "learning_rate": 9.55395793706341e-06, "loss": 0.277, "step": 1774 }, { "epoch": 0.48, "grad_norm": 2.5354385601712255, "learning_rate": 9.553349305964687e-06, "loss": 0.225, "step": 1775 }, { "epoch": 0.48, "grad_norm": 2.79908848828513, "learning_rate": 9.552740279319245e-06, "loss": 0.2381, "step": 1776 }, { "epoch": 0.49, "grad_norm": 2.553719888278797, "learning_rate": 9.55213085717999e-06, "loss": 0.2207, "step": 1777 }, { "epoch": 0.49, "grad_norm": 2.605392576690462, "learning_rate": 9.551521039599863e-06, "loss": 0.2204, "step": 1778 }, { "epoch": 0.49, "grad_norm": 2.5240178610291344, "learning_rate": 9.550910826631838e-06, "loss": 0.1899, "step": 1779 }, { "epoch": 0.49, "grad_norm": 2.846364174813725, "learning_rate": 9.550300218328925e-06, "loss": 0.219, "step": 1780 }, { "epoch": 0.49, "grad_norm": 2.8963255830032133, "learning_rate": 9.549689214744164e-06, "loss": 0.243, "step": 1781 }, { "epoch": 0.49, "grad_norm": 2.795185008332951, "learning_rate": 9.549077815930636e-06, "loss": 0.2629, "step": 1782 }, { "epoch": 0.49, "grad_norm": 2.8229099089245624, "learning_rate": 9.548466021941449e-06, "loss": 0.2513, "step": 1783 }, { "epoch": 0.49, "grad_norm": 3.2482969658252094, "learning_rate": 9.547853832829755e-06, "loss": 0.2531, "step": 1784 }, { "epoch": 0.49, "grad_norm": 2.5229222999636374, "learning_rate": 9.547241248648727e-06, "loss": 0.2142, "step": 1785 }, { "epoch": 0.49, "grad_norm": 2.3979692335565073, "learning_rate": 9.546628269451585e-06, "loss": 0.1935, "step": 1786 }, { "epoch": 0.49, "grad_norm": 2.754020006290599, "learning_rate": 9.546014895291578e-06, "loss": 0.2118, "step": 1787 }, { "epoch": 0.49, "grad_norm": 2.7784641802939127, "learning_rate": 9.54540112622199e-06, "loss": 0.2547, "step": 1788 }, { "epoch": 0.49, "grad_norm": 2.731180462953888, "learning_rate": 9.544786962296132e-06, "loss": 0.2347, "step": 1789 }, { "epoch": 0.49, "grad_norm": 3.040592126685657, "learning_rate": 9.544172403567365e-06, "loss": 0.1901, "step": 1790 }, { "epoch": 0.49, "grad_norm": 2.5571708814237177, "learning_rate": 9.543557450089071e-06, "loss": 0.2163, "step": 1791 }, { "epoch": 0.49, "grad_norm": 11.484015609928097, "learning_rate": 9.54294210191467e-06, "loss": 0.2885, "step": 1792 }, { "epoch": 0.49, "grad_norm": 2.324195827359162, "learning_rate": 9.542326359097619e-06, "loss": 0.1611, "step": 1793 }, { "epoch": 0.49, "grad_norm": 2.590716916693087, "learning_rate": 9.541710221691407e-06, "loss": 0.2202, "step": 1794 }, { "epoch": 0.49, "grad_norm": 2.923240766576748, "learning_rate": 9.541093689749554e-06, "loss": 0.2251, "step": 1795 }, { "epoch": 0.49, "grad_norm": 3.496219520395009, "learning_rate": 9.540476763325623e-06, "loss": 0.25, "step": 1796 }, { "epoch": 0.49, "grad_norm": 2.896660954214899, "learning_rate": 9.539859442473203e-06, "loss": 0.2337, "step": 1797 }, { "epoch": 0.49, "grad_norm": 2.7928843265518504, "learning_rate": 9.539241727245921e-06, "loss": 0.2324, "step": 1798 }, { "epoch": 0.49, "grad_norm": 2.7880172478124217, "learning_rate": 9.538623617697437e-06, "loss": 0.2095, "step": 1799 }, { "epoch": 0.49, "grad_norm": 5.472749054959661, "learning_rate": 9.538005113881445e-06, "loss": 0.2659, "step": 1800 }, { "epoch": 0.49, "grad_norm": 2.670830396455639, "learning_rate": 9.537386215851677e-06, "loss": 0.2332, "step": 1801 }, { "epoch": 0.49, "grad_norm": 3.080030042416917, "learning_rate": 9.536766923661894e-06, "loss": 0.2329, "step": 1802 }, { "epoch": 0.49, "grad_norm": 2.7649874399804313, "learning_rate": 9.536147237365895e-06, "loss": 0.2226, "step": 1803 }, { "epoch": 0.49, "grad_norm": 3.3235678329701828, "learning_rate": 9.53552715701751e-06, "loss": 0.2343, "step": 1804 }, { "epoch": 0.49, "grad_norm": 2.721784115519037, "learning_rate": 9.534906682670606e-06, "loss": 0.2197, "step": 1805 }, { "epoch": 0.49, "grad_norm": 2.5320710013883065, "learning_rate": 9.534285814379084e-06, "loss": 0.1815, "step": 1806 }, { "epoch": 0.49, "grad_norm": 2.845195811264802, "learning_rate": 9.533664552196875e-06, "loss": 0.2107, "step": 1807 }, { "epoch": 0.49, "grad_norm": 2.8196917814835873, "learning_rate": 9.533042896177951e-06, "loss": 0.2174, "step": 1808 }, { "epoch": 0.49, "grad_norm": 3.2058179430262976, "learning_rate": 9.532420846376316e-06, "loss": 0.2779, "step": 1809 }, { "epoch": 0.49, "grad_norm": 2.8220935091568498, "learning_rate": 9.531798402846004e-06, "loss": 0.2182, "step": 1810 }, { "epoch": 0.49, "grad_norm": 2.9622485496711426, "learning_rate": 9.531175565641087e-06, "loss": 0.2485, "step": 1811 }, { "epoch": 0.49, "grad_norm": 2.742569359962187, "learning_rate": 9.530552334815672e-06, "loss": 0.2205, "step": 1812 }, { "epoch": 0.49, "grad_norm": 2.560874594311351, "learning_rate": 9.529928710423897e-06, "loss": 0.2021, "step": 1813 }, { "epoch": 0.5, "grad_norm": 2.8660690670715376, "learning_rate": 9.529304692519936e-06, "loss": 0.2197, "step": 1814 }, { "epoch": 0.5, "grad_norm": 2.653512013513791, "learning_rate": 9.528680281157999e-06, "loss": 0.1918, "step": 1815 }, { "epoch": 0.5, "grad_norm": 4.358691075439933, "learning_rate": 9.528055476392325e-06, "loss": 0.2312, "step": 1816 }, { "epoch": 0.5, "grad_norm": 2.930750185807348, "learning_rate": 9.527430278277194e-06, "loss": 0.1845, "step": 1817 }, { "epoch": 0.5, "grad_norm": 2.5720787870654163, "learning_rate": 9.526804686866916e-06, "loss": 0.1811, "step": 1818 }, { "epoch": 0.5, "grad_norm": 2.5755857865455787, "learning_rate": 9.526178702215833e-06, "loss": 0.221, "step": 1819 }, { "epoch": 0.5, "grad_norm": 2.974370888850911, "learning_rate": 9.525552324378324e-06, "loss": 0.2462, "step": 1820 }, { "epoch": 0.5, "grad_norm": 2.656199892069847, "learning_rate": 9.524925553408806e-06, "loss": 0.2142, "step": 1821 }, { "epoch": 0.5, "grad_norm": 2.4448411260276273, "learning_rate": 9.524298389361724e-06, "loss": 0.193, "step": 1822 }, { "epoch": 0.5, "grad_norm": 3.247907757639739, "learning_rate": 9.523670832291556e-06, "loss": 0.2239, "step": 1823 }, { "epoch": 0.5, "grad_norm": 2.6984256328987652, "learning_rate": 9.523042882252825e-06, "loss": 0.224, "step": 1824 }, { "epoch": 0.5, "grad_norm": 2.664496157380522, "learning_rate": 9.522414539300074e-06, "loss": 0.2161, "step": 1825 }, { "epoch": 0.5, "grad_norm": 3.168184124135893, "learning_rate": 9.521785803487888e-06, "loss": 0.2655, "step": 1826 }, { "epoch": 0.5, "grad_norm": 2.6135886449829258, "learning_rate": 9.521156674870888e-06, "loss": 0.2011, "step": 1827 }, { "epoch": 0.5, "grad_norm": 3.4198571718264494, "learning_rate": 9.520527153503722e-06, "loss": 0.275, "step": 1828 }, { "epoch": 0.5, "grad_norm": 2.5649898725678835, "learning_rate": 9.51989723944108e-06, "loss": 0.2152, "step": 1829 }, { "epoch": 0.5, "grad_norm": 2.5715593733642272, "learning_rate": 9.51926693273768e-06, "loss": 0.2343, "step": 1830 }, { "epoch": 0.5, "grad_norm": 3.05875050110746, "learning_rate": 9.518636233448276e-06, "loss": 0.1952, "step": 1831 }, { "epoch": 0.5, "grad_norm": 2.9192309259881792, "learning_rate": 9.518005141627659e-06, "loss": 0.1911, "step": 1832 }, { "epoch": 0.5, "grad_norm": 2.7598320923834168, "learning_rate": 9.517373657330648e-06, "loss": 0.2255, "step": 1833 }, { "epoch": 0.5, "grad_norm": 3.3713356821438447, "learning_rate": 9.516741780612102e-06, "loss": 0.2256, "step": 1834 }, { "epoch": 0.5, "grad_norm": 3.0775933021261324, "learning_rate": 9.516109511526912e-06, "loss": 0.2287, "step": 1835 }, { "epoch": 0.5, "grad_norm": 2.92150254626363, "learning_rate": 9.515476850130001e-06, "loss": 0.2179, "step": 1836 }, { "epoch": 0.5, "grad_norm": 3.1386410386127683, "learning_rate": 9.514843796476329e-06, "loss": 0.2004, "step": 1837 }, { "epoch": 0.5, "grad_norm": 2.7784503953436968, "learning_rate": 9.51421035062089e-06, "loss": 0.2161, "step": 1838 }, { "epoch": 0.5, "grad_norm": 2.6419659813585032, "learning_rate": 9.51357651261871e-06, "loss": 0.2222, "step": 1839 }, { "epoch": 0.5, "grad_norm": 3.4760906899052904, "learning_rate": 9.512942282524848e-06, "loss": 0.2457, "step": 1840 }, { "epoch": 0.5, "grad_norm": 2.5684668295644104, "learning_rate": 9.512307660394404e-06, "loss": 0.2055, "step": 1841 }, { "epoch": 0.5, "grad_norm": 2.964621561473322, "learning_rate": 9.511672646282502e-06, "loss": 0.204, "step": 1842 }, { "epoch": 0.5, "grad_norm": 3.0596750014645933, "learning_rate": 9.51103724024431e-06, "loss": 0.2181, "step": 1843 }, { "epoch": 0.5, "grad_norm": 2.5677103627021687, "learning_rate": 9.510401442335022e-06, "loss": 0.2149, "step": 1844 }, { "epoch": 0.5, "grad_norm": 2.7876347799956704, "learning_rate": 9.509765252609873e-06, "loss": 0.2536, "step": 1845 }, { "epoch": 0.5, "grad_norm": 2.8113449582417394, "learning_rate": 9.509128671124123e-06, "loss": 0.204, "step": 1846 }, { "epoch": 0.5, "grad_norm": 2.3997174306890647, "learning_rate": 9.508491697933076e-06, "loss": 0.201, "step": 1847 }, { "epoch": 0.5, "grad_norm": 2.6722596209316203, "learning_rate": 9.507854333092064e-06, "loss": 0.1983, "step": 1848 }, { "epoch": 0.5, "grad_norm": 2.8171963877872015, "learning_rate": 9.507216576656454e-06, "loss": 0.2501, "step": 1849 }, { "epoch": 0.51, "grad_norm": 3.136077220446606, "learning_rate": 9.506578428681648e-06, "loss": 0.224, "step": 1850 }, { "epoch": 0.51, "grad_norm": 2.6603871708849227, "learning_rate": 9.50593988922308e-06, "loss": 0.2223, "step": 1851 }, { "epoch": 0.51, "grad_norm": 2.478401052836361, "learning_rate": 9.505300958336224e-06, "loss": 0.2123, "step": 1852 }, { "epoch": 0.51, "grad_norm": 2.7983938569349647, "learning_rate": 9.50466163607658e-06, "loss": 0.2426, "step": 1853 }, { "epoch": 0.51, "grad_norm": 3.1826528677294137, "learning_rate": 9.504021922499685e-06, "loss": 0.2295, "step": 1854 }, { "epoch": 0.51, "grad_norm": 2.4609413388220713, "learning_rate": 9.503381817661113e-06, "loss": 0.1714, "step": 1855 }, { "epoch": 0.51, "grad_norm": 2.4388144063481425, "learning_rate": 9.502741321616467e-06, "loss": 0.1852, "step": 1856 }, { "epoch": 0.51, "grad_norm": 2.5701719511300603, "learning_rate": 9.502100434421388e-06, "loss": 0.2182, "step": 1857 }, { "epoch": 0.51, "grad_norm": 2.630080586035678, "learning_rate": 9.501459156131549e-06, "loss": 0.1901, "step": 1858 }, { "epoch": 0.51, "grad_norm": 2.5367991126218006, "learning_rate": 9.500817486802658e-06, "loss": 0.2091, "step": 1859 }, { "epoch": 0.51, "grad_norm": 2.977441082365511, "learning_rate": 9.500175426490455e-06, "loss": 0.2697, "step": 1860 }, { "epoch": 0.51, "grad_norm": 2.694120040608933, "learning_rate": 9.499532975250719e-06, "loss": 0.2597, "step": 1861 }, { "epoch": 0.51, "grad_norm": 2.8699728652688603, "learning_rate": 9.498890133139253e-06, "loss": 0.2311, "step": 1862 }, { "epoch": 0.51, "grad_norm": 2.592795339691541, "learning_rate": 9.498246900211908e-06, "loss": 0.2263, "step": 1863 }, { "epoch": 0.51, "grad_norm": 2.8263425890119076, "learning_rate": 9.497603276524555e-06, "loss": 0.2416, "step": 1864 }, { "epoch": 0.51, "grad_norm": 3.0721887535385144, "learning_rate": 9.496959262133108e-06, "loss": 0.2283, "step": 1865 }, { "epoch": 0.51, "grad_norm": 2.4867602124739987, "learning_rate": 9.49631485709351e-06, "loss": 0.2151, "step": 1866 }, { "epoch": 0.51, "grad_norm": 2.5663160110048344, "learning_rate": 9.495670061461747e-06, "loss": 0.1792, "step": 1867 }, { "epoch": 0.51, "grad_norm": 2.4583693021586006, "learning_rate": 9.49502487529382e-06, "loss": 0.1901, "step": 1868 }, { "epoch": 0.51, "grad_norm": 2.450647294077089, "learning_rate": 9.494379298645788e-06, "loss": 0.215, "step": 1869 }, { "epoch": 0.51, "grad_norm": 3.1213740419409457, "learning_rate": 9.493733331573724e-06, "loss": 0.2618, "step": 1870 }, { "epoch": 0.51, "grad_norm": 2.919033858671396, "learning_rate": 9.493086974133747e-06, "loss": 0.2365, "step": 1871 }, { "epoch": 0.51, "grad_norm": 3.1208642135231974, "learning_rate": 9.492440226382003e-06, "loss": 0.2599, "step": 1872 }, { "epoch": 0.51, "grad_norm": 2.9299860824809874, "learning_rate": 9.491793088374676e-06, "loss": 0.2265, "step": 1873 }, { "epoch": 0.51, "grad_norm": 2.5396018602628585, "learning_rate": 9.491145560167983e-06, "loss": 0.1909, "step": 1874 }, { "epoch": 0.51, "grad_norm": 2.733492147565226, "learning_rate": 9.490497641818172e-06, "loss": 0.2257, "step": 1875 }, { "epoch": 0.51, "grad_norm": 2.5504400398440796, "learning_rate": 9.489849333381529e-06, "loss": 0.2288, "step": 1876 }, { "epoch": 0.51, "grad_norm": 2.9391174795578725, "learning_rate": 9.489200634914373e-06, "loss": 0.2223, "step": 1877 }, { "epoch": 0.51, "grad_norm": 3.778446023649222, "learning_rate": 9.488551546473055e-06, "loss": 0.2553, "step": 1878 }, { "epoch": 0.51, "grad_norm": 2.973405428311063, "learning_rate": 9.48790206811396e-06, "loss": 0.2556, "step": 1879 }, { "epoch": 0.51, "grad_norm": 2.441651143798424, "learning_rate": 9.48725219989351e-06, "loss": 0.2079, "step": 1880 }, { "epoch": 0.51, "grad_norm": 2.694029475248282, "learning_rate": 9.486601941868155e-06, "loss": 0.1883, "step": 1881 }, { "epoch": 0.51, "grad_norm": 2.4020601837291995, "learning_rate": 9.485951294094386e-06, "loss": 0.2231, "step": 1882 }, { "epoch": 0.51, "grad_norm": 3.0210456620414887, "learning_rate": 9.485300256628725e-06, "loss": 0.2005, "step": 1883 }, { "epoch": 0.51, "grad_norm": 2.524550792070919, "learning_rate": 9.484648829527722e-06, "loss": 0.227, "step": 1884 }, { "epoch": 0.51, "grad_norm": 3.1144946771860025, "learning_rate": 9.483997012847971e-06, "loss": 0.2383, "step": 1885 }, { "epoch": 0.51, "grad_norm": 2.485604120410346, "learning_rate": 9.483344806646096e-06, "loss": 0.1896, "step": 1886 }, { "epoch": 0.52, "grad_norm": 2.4957293833751244, "learning_rate": 9.48269221097875e-06, "loss": 0.2012, "step": 1887 }, { "epoch": 0.52, "grad_norm": 3.1301637551102504, "learning_rate": 9.482039225902623e-06, "loss": 0.2419, "step": 1888 }, { "epoch": 0.52, "grad_norm": 2.2278398899690575, "learning_rate": 9.481385851474443e-06, "loss": 0.1573, "step": 1889 }, { "epoch": 0.52, "grad_norm": 2.8055241281176797, "learning_rate": 9.480732087750968e-06, "loss": 0.2213, "step": 1890 }, { "epoch": 0.52, "grad_norm": 2.4564968562722993, "learning_rate": 9.480077934788987e-06, "loss": 0.199, "step": 1891 }, { "epoch": 0.52, "grad_norm": 2.729906298416512, "learning_rate": 9.479423392645327e-06, "loss": 0.2137, "step": 1892 }, { "epoch": 0.52, "grad_norm": 2.3677899548039942, "learning_rate": 9.478768461376848e-06, "loss": 0.1871, "step": 1893 }, { "epoch": 0.52, "grad_norm": 2.6075451599195234, "learning_rate": 9.478113141040444e-06, "loss": 0.2289, "step": 1894 }, { "epoch": 0.52, "grad_norm": 2.483149257014688, "learning_rate": 9.477457431693043e-06, "loss": 0.1905, "step": 1895 }, { "epoch": 0.52, "grad_norm": 2.9003563154187777, "learning_rate": 9.476801333391604e-06, "loss": 0.2148, "step": 1896 }, { "epoch": 0.52, "grad_norm": 2.5408719970376263, "learning_rate": 9.476144846193124e-06, "loss": 0.2171, "step": 1897 }, { "epoch": 0.52, "grad_norm": 2.8334286283044765, "learning_rate": 9.475487970154628e-06, "loss": 0.2191, "step": 1898 }, { "epoch": 0.52, "grad_norm": 2.749922657204987, "learning_rate": 9.474830705333185e-06, "loss": 0.2214, "step": 1899 }, { "epoch": 0.52, "grad_norm": 2.4796788277965858, "learning_rate": 9.474173051785884e-06, "loss": 0.1978, "step": 1900 }, { "epoch": 0.52, "grad_norm": 2.8908276775330957, "learning_rate": 9.473515009569857e-06, "loss": 0.2654, "step": 1901 }, { "epoch": 0.52, "grad_norm": 2.568959426905677, "learning_rate": 9.472856578742273e-06, "loss": 0.2314, "step": 1902 }, { "epoch": 0.52, "grad_norm": 3.014301621071135, "learning_rate": 9.472197759360322e-06, "loss": 0.2192, "step": 1903 }, { "epoch": 0.52, "grad_norm": 2.851670442117005, "learning_rate": 9.47153855148124e-06, "loss": 0.2138, "step": 1904 }, { "epoch": 0.52, "grad_norm": 2.4907931690439624, "learning_rate": 9.470878955162291e-06, "loss": 0.2079, "step": 1905 }, { "epoch": 0.52, "grad_norm": 2.6247607760777676, "learning_rate": 9.470218970460771e-06, "loss": 0.2186, "step": 1906 }, { "epoch": 0.52, "grad_norm": 2.806282643848966, "learning_rate": 9.469558597434018e-06, "loss": 0.2217, "step": 1907 }, { "epoch": 0.52, "grad_norm": 2.6951927795887323, "learning_rate": 9.468897836139392e-06, "loss": 0.1841, "step": 1908 }, { "epoch": 0.52, "grad_norm": 2.7122144265711903, "learning_rate": 9.468236686634298e-06, "loss": 0.2002, "step": 1909 }, { "epoch": 0.52, "grad_norm": 2.8418876411726717, "learning_rate": 9.467575148976167e-06, "loss": 0.2201, "step": 1910 }, { "epoch": 0.52, "grad_norm": 2.7964638164990028, "learning_rate": 9.466913223222467e-06, "loss": 0.2007, "step": 1911 }, { "epoch": 0.52, "grad_norm": 2.714529518802107, "learning_rate": 9.4662509094307e-06, "loss": 0.2213, "step": 1912 }, { "epoch": 0.52, "grad_norm": 2.5755466153761963, "learning_rate": 9.465588207658398e-06, "loss": 0.2126, "step": 1913 }, { "epoch": 0.52, "grad_norm": 3.0776784341867045, "learning_rate": 9.464925117963133e-06, "loss": 0.2405, "step": 1914 }, { "epoch": 0.52, "grad_norm": 2.692709742743847, "learning_rate": 9.464261640402504e-06, "loss": 0.1977, "step": 1915 }, { "epoch": 0.52, "grad_norm": 2.61925345076181, "learning_rate": 9.46359777503415e-06, "loss": 0.1991, "step": 1916 }, { "epoch": 0.52, "grad_norm": 2.9490438106366628, "learning_rate": 9.462933521915738e-06, "loss": 0.259, "step": 1917 }, { "epoch": 0.52, "grad_norm": 2.8131085439040238, "learning_rate": 9.462268881104973e-06, "loss": 0.213, "step": 1918 }, { "epoch": 0.52, "grad_norm": 2.6930230106023165, "learning_rate": 9.461603852659592e-06, "loss": 0.2219, "step": 1919 }, { "epoch": 0.52, "grad_norm": 2.6248142589618286, "learning_rate": 9.460938436637363e-06, "loss": 0.1881, "step": 1920 }, { "epoch": 0.52, "grad_norm": 2.519783729717937, "learning_rate": 9.460272633096093e-06, "loss": 0.1977, "step": 1921 }, { "epoch": 0.52, "grad_norm": 2.44566254946816, "learning_rate": 9.45960644209362e-06, "loss": 0.1617, "step": 1922 }, { "epoch": 0.52, "grad_norm": 3.080743755461343, "learning_rate": 9.458939863687814e-06, "loss": 0.2351, "step": 1923 }, { "epoch": 0.53, "grad_norm": 2.372196875741983, "learning_rate": 9.45827289793658e-06, "loss": 0.1855, "step": 1924 }, { "epoch": 0.53, "grad_norm": 2.8978361298701696, "learning_rate": 9.45760554489786e-06, "loss": 0.2039, "step": 1925 }, { "epoch": 0.53, "grad_norm": 2.938487337888618, "learning_rate": 9.456937804629623e-06, "loss": 0.2349, "step": 1926 }, { "epoch": 0.53, "grad_norm": 2.5242025735422233, "learning_rate": 9.456269677189878e-06, "loss": 0.23, "step": 1927 }, { "epoch": 0.53, "grad_norm": 2.9640757522941916, "learning_rate": 9.455601162636662e-06, "loss": 0.2345, "step": 1928 }, { "epoch": 0.53, "grad_norm": 2.4592916506365734, "learning_rate": 9.454932261028052e-06, "loss": 0.1875, "step": 1929 }, { "epoch": 0.53, "grad_norm": 2.676826726363711, "learning_rate": 9.45426297242215e-06, "loss": 0.2357, "step": 1930 }, { "epoch": 0.53, "grad_norm": 2.603045167142856, "learning_rate": 9.4535932968771e-06, "loss": 0.2186, "step": 1931 }, { "epoch": 0.53, "grad_norm": 2.6179002499197743, "learning_rate": 9.45292323445108e-06, "loss": 0.2329, "step": 1932 }, { "epoch": 0.53, "grad_norm": 3.4951787850459533, "learning_rate": 9.452252785202291e-06, "loss": 0.2556, "step": 1933 }, { "epoch": 0.53, "grad_norm": 2.8161851093295622, "learning_rate": 9.451581949188979e-06, "loss": 0.2358, "step": 1934 }, { "epoch": 0.53, "grad_norm": 2.8604351151213536, "learning_rate": 9.450910726469415e-06, "loss": 0.2146, "step": 1935 }, { "epoch": 0.53, "grad_norm": 2.7247453215479185, "learning_rate": 9.450239117101913e-06, "loss": 0.2197, "step": 1936 }, { "epoch": 0.53, "grad_norm": 2.5932164082196785, "learning_rate": 9.449567121144812e-06, "loss": 0.1759, "step": 1937 }, { "epoch": 0.53, "grad_norm": 2.459127860060185, "learning_rate": 9.448894738656488e-06, "loss": 0.2054, "step": 1938 }, { "epoch": 0.53, "grad_norm": 2.5567594832179057, "learning_rate": 9.448221969695352e-06, "loss": 0.2053, "step": 1939 }, { "epoch": 0.53, "grad_norm": 3.063139519828835, "learning_rate": 9.447548814319844e-06, "loss": 0.2648, "step": 1940 }, { "epoch": 0.53, "grad_norm": 2.7667141965766424, "learning_rate": 9.446875272588444e-06, "loss": 0.2322, "step": 1941 }, { "epoch": 0.53, "grad_norm": 2.65012761158475, "learning_rate": 9.446201344559663e-06, "loss": 0.2388, "step": 1942 }, { "epoch": 0.53, "grad_norm": 2.485013286242795, "learning_rate": 9.445527030292038e-06, "loss": 0.1689, "step": 1943 }, { "epoch": 0.53, "grad_norm": 3.1674008356907786, "learning_rate": 9.444852329844154e-06, "loss": 0.1947, "step": 1944 }, { "epoch": 0.53, "grad_norm": 2.5041850599188082, "learning_rate": 9.444177243274619e-06, "loss": 0.2318, "step": 1945 }, { "epoch": 0.53, "grad_norm": 2.543262982507882, "learning_rate": 9.443501770642074e-06, "loss": 0.2224, "step": 1946 }, { "epoch": 0.53, "grad_norm": 2.552510002975641, "learning_rate": 9.442825912005203e-06, "loss": 0.1832, "step": 1947 }, { "epoch": 0.53, "grad_norm": 2.6483771953343695, "learning_rate": 9.442149667422712e-06, "loss": 0.2122, "step": 1948 }, { "epoch": 0.53, "grad_norm": 3.116171727223382, "learning_rate": 9.441473036953351e-06, "loss": 0.2657, "step": 1949 }, { "epoch": 0.53, "grad_norm": 2.4444632077571824, "learning_rate": 9.440796020655893e-06, "loss": 0.2112, "step": 1950 }, { "epoch": 0.53, "grad_norm": 2.8485890635983155, "learning_rate": 9.440118618589153e-06, "loss": 0.2249, "step": 1951 }, { "epoch": 0.53, "grad_norm": 2.857300519261187, "learning_rate": 9.439440830811978e-06, "loss": 0.2342, "step": 1952 }, { "epoch": 0.53, "grad_norm": 2.9023746750587613, "learning_rate": 9.438762657383244e-06, "loss": 0.2244, "step": 1953 }, { "epoch": 0.53, "grad_norm": 3.660431395455733, "learning_rate": 9.438084098361865e-06, "loss": 0.1791, "step": 1954 }, { "epoch": 0.53, "grad_norm": 3.1628614706220777, "learning_rate": 9.437405153806786e-06, "loss": 0.2179, "step": 1955 }, { "epoch": 0.53, "grad_norm": 2.6193031981195296, "learning_rate": 9.43672582377699e-06, "loss": 0.2186, "step": 1956 }, { "epoch": 0.53, "grad_norm": 2.637730547352104, "learning_rate": 9.436046108331485e-06, "loss": 0.1905, "step": 1957 }, { "epoch": 0.53, "grad_norm": 5.643787526420855, "learning_rate": 9.435366007529321e-06, "loss": 0.2487, "step": 1958 }, { "epoch": 0.53, "grad_norm": 2.806439694154534, "learning_rate": 9.434685521429576e-06, "loss": 0.256, "step": 1959 }, { "epoch": 0.54, "grad_norm": 2.426073384191156, "learning_rate": 9.434004650091364e-06, "loss": 0.2006, "step": 1960 }, { "epoch": 0.54, "grad_norm": 2.848982888429369, "learning_rate": 9.433323393573831e-06, "loss": 0.2241, "step": 1961 }, { "epoch": 0.54, "grad_norm": 2.4436292578971477, "learning_rate": 9.432641751936162e-06, "loss": 0.2039, "step": 1962 }, { "epoch": 0.54, "grad_norm": 2.690399285702892, "learning_rate": 9.431959725237565e-06, "loss": 0.2256, "step": 1963 }, { "epoch": 0.54, "grad_norm": 2.3640975529400783, "learning_rate": 9.43127731353729e-06, "loss": 0.1991, "step": 1964 }, { "epoch": 0.54, "grad_norm": 2.664381998689749, "learning_rate": 9.430594516894615e-06, "loss": 0.2355, "step": 1965 }, { "epoch": 0.54, "grad_norm": 2.927538974930774, "learning_rate": 9.42991133536886e-06, "loss": 0.1872, "step": 1966 }, { "epoch": 0.54, "grad_norm": 3.4192325030106843, "learning_rate": 9.429227769019366e-06, "loss": 0.1966, "step": 1967 }, { "epoch": 0.54, "grad_norm": 2.5772244231544703, "learning_rate": 9.428543817905518e-06, "loss": 0.1878, "step": 1968 }, { "epoch": 0.54, "grad_norm": 2.945273229593765, "learning_rate": 9.427859482086728e-06, "loss": 0.2205, "step": 1969 }, { "epoch": 0.54, "grad_norm": 2.4021082584549, "learning_rate": 9.427174761622447e-06, "loss": 0.178, "step": 1970 }, { "epoch": 0.54, "grad_norm": 2.723789379531242, "learning_rate": 9.426489656572151e-06, "loss": 0.224, "step": 1971 }, { "epoch": 0.54, "grad_norm": 2.4069067741902193, "learning_rate": 9.42580416699536e-06, "loss": 0.185, "step": 1972 }, { "epoch": 0.54, "grad_norm": 2.61808192969402, "learning_rate": 9.425118292951622e-06, "loss": 0.1783, "step": 1973 }, { "epoch": 0.54, "grad_norm": 2.6068471187754807, "learning_rate": 9.424432034500514e-06, "loss": 0.1997, "step": 1974 }, { "epoch": 0.54, "grad_norm": 2.505352164760058, "learning_rate": 9.423745391701656e-06, "loss": 0.1755, "step": 1975 }, { "epoch": 0.54, "grad_norm": 2.905321972183192, "learning_rate": 9.423058364614692e-06, "loss": 0.193, "step": 1976 }, { "epoch": 0.54, "grad_norm": 2.456636097164985, "learning_rate": 9.422370953299305e-06, "loss": 0.1936, "step": 1977 }, { "epoch": 0.54, "grad_norm": 2.651484055642835, "learning_rate": 9.42168315781521e-06, "loss": 0.2174, "step": 1978 }, { "epoch": 0.54, "grad_norm": 3.2236332192989554, "learning_rate": 9.420994978222156e-06, "loss": 0.2565, "step": 1979 }, { "epoch": 0.54, "grad_norm": 2.662959400608087, "learning_rate": 9.420306414579925e-06, "loss": 0.2546, "step": 1980 }, { "epoch": 0.54, "grad_norm": 2.7209935987260008, "learning_rate": 9.419617466948332e-06, "loss": 0.1898, "step": 1981 }, { "epoch": 0.54, "grad_norm": 2.608906130950204, "learning_rate": 9.418928135387224e-06, "loss": 0.1959, "step": 1982 }, { "epoch": 0.54, "grad_norm": 2.892016124423646, "learning_rate": 9.418238419956484e-06, "loss": 0.2413, "step": 1983 }, { "epoch": 0.54, "grad_norm": 2.5214807013393394, "learning_rate": 9.417548320716027e-06, "loss": 0.1986, "step": 1984 }, { "epoch": 0.54, "grad_norm": 2.6225224108105416, "learning_rate": 9.416857837725802e-06, "loss": 0.1945, "step": 1985 }, { "epoch": 0.54, "grad_norm": 3.6927849854121133, "learning_rate": 9.41616697104579e-06, "loss": 0.234, "step": 1986 }, { "epoch": 0.54, "grad_norm": 2.885252230827505, "learning_rate": 9.415475720736005e-06, "loss": 0.193, "step": 1987 }, { "epoch": 0.54, "grad_norm": 2.406536944160158, "learning_rate": 9.4147840868565e-06, "loss": 0.2084, "step": 1988 }, { "epoch": 0.54, "grad_norm": 2.3689466693689845, "learning_rate": 9.41409206946735e-06, "loss": 0.1754, "step": 1989 }, { "epoch": 0.54, "grad_norm": 2.732759605468346, "learning_rate": 9.413399668628678e-06, "loss": 0.234, "step": 1990 }, { "epoch": 0.54, "grad_norm": 2.8465027053557437, "learning_rate": 9.412706884400626e-06, "loss": 0.2314, "step": 1991 }, { "epoch": 0.54, "grad_norm": 2.6859551471287766, "learning_rate": 9.41201371684338e-06, "loss": 0.2069, "step": 1992 }, { "epoch": 0.54, "grad_norm": 2.4543680190490726, "learning_rate": 9.41132016601715e-06, "loss": 0.1891, "step": 1993 }, { "epoch": 0.54, "grad_norm": 2.543081842279785, "learning_rate": 9.41062623198219e-06, "loss": 0.2007, "step": 1994 }, { "epoch": 0.54, "grad_norm": 2.643504644548493, "learning_rate": 9.40993191479878e-06, "loss": 0.2005, "step": 1995 }, { "epoch": 0.54, "grad_norm": 2.6386626453698856, "learning_rate": 9.40923721452723e-06, "loss": 0.1965, "step": 1996 }, { "epoch": 0.55, "grad_norm": 2.7883578537046163, "learning_rate": 9.408542131227899e-06, "loss": 0.2405, "step": 1997 }, { "epoch": 0.55, "grad_norm": 2.5918221423216385, "learning_rate": 9.407846664961156e-06, "loss": 0.2374, "step": 1998 }, { "epoch": 0.55, "grad_norm": 2.6790357210419495, "learning_rate": 9.407150815787423e-06, "loss": 0.2215, "step": 1999 }, { "epoch": 0.55, "grad_norm": 2.672340650914591, "learning_rate": 9.406454583767148e-06, "loss": 0.1919, "step": 2000 }, { "epoch": 0.55, "grad_norm": 2.199263124042724, "learning_rate": 9.405757968960809e-06, "loss": 0.1702, "step": 2001 }, { "epoch": 0.55, "grad_norm": 2.826228979012623, "learning_rate": 9.405060971428924e-06, "loss": 0.1735, "step": 2002 }, { "epoch": 0.55, "grad_norm": 2.8056742959283962, "learning_rate": 9.404363591232038e-06, "loss": 0.2377, "step": 2003 }, { "epoch": 0.55, "grad_norm": 2.2464665235776438, "learning_rate": 9.403665828430732e-06, "loss": 0.1711, "step": 2004 }, { "epoch": 0.55, "grad_norm": 2.684128882037673, "learning_rate": 9.402967683085622e-06, "loss": 0.194, "step": 2005 }, { "epoch": 0.55, "grad_norm": 2.55592372971314, "learning_rate": 9.402269155257355e-06, "loss": 0.1856, "step": 2006 }, { "epoch": 0.55, "grad_norm": 2.810726533867702, "learning_rate": 9.401570245006612e-06, "loss": 0.1993, "step": 2007 }, { "epoch": 0.55, "grad_norm": 2.673834535407018, "learning_rate": 9.400870952394105e-06, "loss": 0.2109, "step": 2008 }, { "epoch": 0.55, "grad_norm": 2.554515647138085, "learning_rate": 9.400171277480583e-06, "loss": 0.2084, "step": 2009 }, { "epoch": 0.55, "grad_norm": 3.2341086820966525, "learning_rate": 9.399471220326827e-06, "loss": 0.2471, "step": 2010 }, { "epoch": 0.55, "grad_norm": 2.6214640591966494, "learning_rate": 9.39877078099365e-06, "loss": 0.2083, "step": 2011 }, { "epoch": 0.55, "grad_norm": 2.765259651089772, "learning_rate": 9.398069959541895e-06, "loss": 0.2277, "step": 2012 }, { "epoch": 0.55, "grad_norm": 2.426256706911028, "learning_rate": 9.397368756032445e-06, "loss": 0.1914, "step": 2013 }, { "epoch": 0.55, "grad_norm": 2.734865562586326, "learning_rate": 9.396667170526215e-06, "loss": 0.1872, "step": 2014 }, { "epoch": 0.55, "grad_norm": 2.5323656842840556, "learning_rate": 9.395965203084149e-06, "loss": 0.1916, "step": 2015 }, { "epoch": 0.55, "grad_norm": 2.691272539878832, "learning_rate": 9.39526285376723e-06, "loss": 0.1791, "step": 2016 }, { "epoch": 0.55, "grad_norm": 2.290756305497156, "learning_rate": 9.394560122636463e-06, "loss": 0.1892, "step": 2017 }, { "epoch": 0.55, "grad_norm": 2.695272436574397, "learning_rate": 9.3938570097529e-06, "loss": 0.2303, "step": 2018 }, { "epoch": 0.55, "grad_norm": 2.856504204295195, "learning_rate": 9.393153515177617e-06, "loss": 0.2034, "step": 2019 }, { "epoch": 0.55, "grad_norm": 2.6351374699877446, "learning_rate": 9.39244963897173e-06, "loss": 0.2342, "step": 2020 }, { "epoch": 0.55, "grad_norm": 2.5280491061253145, "learning_rate": 9.391745381196382e-06, "loss": 0.2085, "step": 2021 }, { "epoch": 0.55, "grad_norm": 2.218726658355057, "learning_rate": 9.39104074191275e-06, "loss": 0.1772, "step": 2022 }, { "epoch": 0.55, "grad_norm": 2.569107509479353, "learning_rate": 9.390335721182047e-06, "loss": 0.2057, "step": 2023 }, { "epoch": 0.55, "grad_norm": 2.425718166092449, "learning_rate": 9.389630319065518e-06, "loss": 0.2033, "step": 2024 }, { "epoch": 0.55, "grad_norm": 2.5682746905234297, "learning_rate": 9.38892453562444e-06, "loss": 0.1902, "step": 2025 }, { "epoch": 0.55, "grad_norm": 2.5803657481503133, "learning_rate": 9.388218370920126e-06, "loss": 0.1877, "step": 2026 }, { "epoch": 0.55, "grad_norm": 2.502568274778471, "learning_rate": 9.387511825013917e-06, "loss": 0.1952, "step": 2027 }, { "epoch": 0.55, "grad_norm": 2.367587850951012, "learning_rate": 9.386804897967192e-06, "loss": 0.215, "step": 2028 }, { "epoch": 0.55, "grad_norm": 2.483290930090042, "learning_rate": 9.386097589841362e-06, "loss": 0.192, "step": 2029 }, { "epoch": 0.55, "grad_norm": 2.9069360870111547, "learning_rate": 9.38538990069787e-06, "loss": 0.2559, "step": 2030 }, { "epoch": 0.55, "grad_norm": 2.4572354590028342, "learning_rate": 9.384681830598192e-06, "loss": 0.1854, "step": 2031 }, { "epoch": 0.55, "grad_norm": 2.4336479586620023, "learning_rate": 9.383973379603837e-06, "loss": 0.2043, "step": 2032 }, { "epoch": 0.56, "grad_norm": 2.936384170713211, "learning_rate": 9.383264547776348e-06, "loss": 0.215, "step": 2033 }, { "epoch": 0.56, "grad_norm": 3.0111136718837406, "learning_rate": 9.382555335177301e-06, "loss": 0.2232, "step": 2034 }, { "epoch": 0.56, "grad_norm": 2.4228679706758345, "learning_rate": 9.381845741868307e-06, "loss": 0.2205, "step": 2035 }, { "epoch": 0.56, "grad_norm": 2.350889180501579, "learning_rate": 9.381135767911005e-06, "loss": 0.1651, "step": 2036 }, { "epoch": 0.56, "grad_norm": 2.833836487250353, "learning_rate": 9.380425413367072e-06, "loss": 0.1983, "step": 2037 }, { "epoch": 0.56, "grad_norm": 3.0301445337781305, "learning_rate": 9.379714678298213e-06, "loss": 0.2363, "step": 2038 }, { "epoch": 0.56, "grad_norm": 2.6564261981617183, "learning_rate": 9.379003562766172e-06, "loss": 0.2102, "step": 2039 }, { "epoch": 0.56, "grad_norm": 2.605630163024274, "learning_rate": 9.378292066832723e-06, "loss": 0.2156, "step": 2040 }, { "epoch": 0.56, "grad_norm": 2.3876344503267166, "learning_rate": 9.377580190559674e-06, "loss": 0.1789, "step": 2041 }, { "epoch": 0.56, "grad_norm": 2.407973641103197, "learning_rate": 9.376867934008862e-06, "loss": 0.1637, "step": 2042 }, { "epoch": 0.56, "grad_norm": 2.255162426339039, "learning_rate": 9.376155297242163e-06, "loss": 0.1744, "step": 2043 }, { "epoch": 0.56, "grad_norm": 2.5777906248589373, "learning_rate": 9.375442280321483e-06, "loss": 0.2254, "step": 2044 }, { "epoch": 0.56, "grad_norm": 2.560905524212858, "learning_rate": 9.37472888330876e-06, "loss": 0.1884, "step": 2045 }, { "epoch": 0.56, "grad_norm": 2.3351811571020478, "learning_rate": 9.374015106265968e-06, "loss": 0.1899, "step": 2046 }, { "epoch": 0.56, "grad_norm": 2.667532213283011, "learning_rate": 9.373300949255112e-06, "loss": 0.2127, "step": 2047 }, { "epoch": 0.56, "grad_norm": 2.4224288583375313, "learning_rate": 9.372586412338228e-06, "loss": 0.2134, "step": 2048 }, { "epoch": 0.56, "grad_norm": 2.362493648754961, "learning_rate": 9.371871495577391e-06, "loss": 0.1995, "step": 2049 }, { "epoch": 0.56, "grad_norm": 2.930840530202458, "learning_rate": 9.371156199034703e-06, "loss": 0.2192, "step": 2050 }, { "epoch": 0.56, "grad_norm": 2.486308103735155, "learning_rate": 9.370440522772305e-06, "loss": 0.217, "step": 2051 }, { "epoch": 0.56, "grad_norm": 2.778524384283164, "learning_rate": 9.369724466852361e-06, "loss": 0.2159, "step": 2052 }, { "epoch": 0.56, "grad_norm": 2.483776678284865, "learning_rate": 9.36900803133708e-06, "loss": 0.2255, "step": 2053 }, { "epoch": 0.56, "grad_norm": 2.490734908559257, "learning_rate": 9.368291216288696e-06, "loss": 0.2032, "step": 2054 }, { "epoch": 0.56, "grad_norm": 2.624057152426544, "learning_rate": 9.367574021769477e-06, "loss": 0.2028, "step": 2055 }, { "epoch": 0.56, "grad_norm": 2.790844457952432, "learning_rate": 9.36685644784173e-06, "loss": 0.2242, "step": 2056 }, { "epoch": 0.56, "grad_norm": 2.5409723039215666, "learning_rate": 9.366138494567785e-06, "loss": 0.2074, "step": 2057 }, { "epoch": 0.56, "grad_norm": 2.5905844037475796, "learning_rate": 9.365420162010011e-06, "loss": 0.1891, "step": 2058 }, { "epoch": 0.56, "grad_norm": 2.5827029992598973, "learning_rate": 9.364701450230813e-06, "loss": 0.2509, "step": 2059 }, { "epoch": 0.56, "grad_norm": 2.5821468141160038, "learning_rate": 9.36398235929262e-06, "loss": 0.193, "step": 2060 }, { "epoch": 0.56, "grad_norm": 2.7476511990821915, "learning_rate": 9.363262889257902e-06, "loss": 0.2534, "step": 2061 }, { "epoch": 0.56, "grad_norm": 2.5992986961210045, "learning_rate": 9.36254304018916e-06, "loss": 0.205, "step": 2062 }, { "epoch": 0.56, "grad_norm": 2.5964621959352434, "learning_rate": 9.361822812148925e-06, "loss": 0.2411, "step": 2063 }, { "epoch": 0.56, "grad_norm": 2.348374146086673, "learning_rate": 9.361102205199762e-06, "loss": 0.1912, "step": 2064 }, { "epoch": 0.56, "grad_norm": 2.389593786680925, "learning_rate": 9.360381219404268e-06, "loss": 0.158, "step": 2065 }, { "epoch": 0.56, "grad_norm": 2.342012078792294, "learning_rate": 9.35965985482508e-06, "loss": 0.1978, "step": 2066 }, { "epoch": 0.56, "grad_norm": 2.44827652113443, "learning_rate": 9.35893811152486e-06, "loss": 0.1982, "step": 2067 }, { "epoch": 0.56, "grad_norm": 2.6125108406311552, "learning_rate": 9.358215989566304e-06, "loss": 0.2215, "step": 2068 }, { "epoch": 0.56, "grad_norm": 2.6864063546168135, "learning_rate": 9.357493489012147e-06, "loss": 0.2506, "step": 2069 }, { "epoch": 0.57, "grad_norm": 2.5296836077942304, "learning_rate": 9.356770609925143e-06, "loss": 0.1973, "step": 2070 }, { "epoch": 0.57, "grad_norm": 3.1646610855312853, "learning_rate": 9.356047352368096e-06, "loss": 0.2246, "step": 2071 }, { "epoch": 0.57, "grad_norm": 2.586692582512955, "learning_rate": 9.355323716403834e-06, "loss": 0.1868, "step": 2072 }, { "epoch": 0.57, "grad_norm": 2.697741948964955, "learning_rate": 9.354599702095218e-06, "loss": 0.2085, "step": 2073 }, { "epoch": 0.57, "grad_norm": 2.5813862809412713, "learning_rate": 9.353875309505141e-06, "loss": 0.201, "step": 2074 }, { "epoch": 0.57, "grad_norm": 2.6241177565790523, "learning_rate": 9.353150538696531e-06, "loss": 0.2017, "step": 2075 }, { "epoch": 0.57, "grad_norm": 2.3519312083482364, "learning_rate": 9.35242538973235e-06, "loss": 0.1834, "step": 2076 }, { "epoch": 0.57, "grad_norm": 2.443726068958758, "learning_rate": 9.351699862675589e-06, "loss": 0.1464, "step": 2077 }, { "epoch": 0.57, "grad_norm": 2.662134078970126, "learning_rate": 9.350973957589278e-06, "loss": 0.208, "step": 2078 }, { "epoch": 0.57, "grad_norm": 2.633001440059104, "learning_rate": 9.35024767453647e-06, "loss": 0.2104, "step": 2079 }, { "epoch": 0.57, "grad_norm": 2.504696823995845, "learning_rate": 9.349521013580262e-06, "loss": 0.2071, "step": 2080 }, { "epoch": 0.57, "grad_norm": 2.85331303646671, "learning_rate": 9.348793974783778e-06, "loss": 0.2379, "step": 2081 }, { "epoch": 0.57, "grad_norm": 2.5933256336894783, "learning_rate": 9.348066558210174e-06, "loss": 0.209, "step": 2082 }, { "epoch": 0.57, "grad_norm": 2.418385976699233, "learning_rate": 9.34733876392264e-06, "loss": 0.217, "step": 2083 }, { "epoch": 0.57, "grad_norm": 2.592403604363241, "learning_rate": 9.346610591984398e-06, "loss": 0.2376, "step": 2084 }, { "epoch": 0.57, "grad_norm": 2.8134062455861577, "learning_rate": 9.345882042458708e-06, "loss": 0.248, "step": 2085 }, { "epoch": 0.57, "grad_norm": 2.8066052617759984, "learning_rate": 9.345153115408854e-06, "loss": 0.2222, "step": 2086 }, { "epoch": 0.57, "grad_norm": 2.526065099128459, "learning_rate": 9.34442381089816e-06, "loss": 0.2034, "step": 2087 }, { "epoch": 0.57, "grad_norm": 2.7833584958751167, "learning_rate": 9.343694128989979e-06, "loss": 0.233, "step": 2088 }, { "epoch": 0.57, "grad_norm": 2.7512879606674825, "learning_rate": 9.3429640697477e-06, "loss": 0.2513, "step": 2089 }, { "epoch": 0.57, "grad_norm": 2.3346064071511567, "learning_rate": 9.34223363323474e-06, "loss": 0.1983, "step": 2090 }, { "epoch": 0.57, "grad_norm": 2.4612795853630147, "learning_rate": 9.341502819514555e-06, "loss": 0.2146, "step": 2091 }, { "epoch": 0.57, "grad_norm": 2.5919535815213033, "learning_rate": 9.340771628650628e-06, "loss": 0.2189, "step": 2092 }, { "epoch": 0.57, "grad_norm": 2.845928305763761, "learning_rate": 9.340040060706477e-06, "loss": 0.2308, "step": 2093 }, { "epoch": 0.57, "grad_norm": 2.7728025885276697, "learning_rate": 9.339308115745654e-06, "loss": 0.2416, "step": 2094 }, { "epoch": 0.57, "grad_norm": 2.756575008360787, "learning_rate": 9.338575793831742e-06, "loss": 0.197, "step": 2095 }, { "epoch": 0.57, "grad_norm": 2.5862909447618496, "learning_rate": 9.337843095028357e-06, "loss": 0.2095, "step": 2096 }, { "epoch": 0.57, "grad_norm": 2.49508953339067, "learning_rate": 9.33711001939915e-06, "loss": 0.1963, "step": 2097 }, { "epoch": 0.57, "grad_norm": 2.2600581822721124, "learning_rate": 9.336376567007799e-06, "loss": 0.1723, "step": 2098 }, { "epoch": 0.57, "grad_norm": 2.5511537118576566, "learning_rate": 9.335642737918023e-06, "loss": 0.225, "step": 2099 }, { "epoch": 0.57, "grad_norm": 2.3327355427776744, "learning_rate": 9.334908532193567e-06, "loss": 0.1715, "step": 2100 }, { "epoch": 0.57, "grad_norm": 2.920818904690184, "learning_rate": 9.334173949898211e-06, "loss": 0.2482, "step": 2101 }, { "epoch": 0.57, "grad_norm": 2.7062295513631067, "learning_rate": 9.333438991095767e-06, "loss": 0.2147, "step": 2102 }, { "epoch": 0.57, "grad_norm": 2.3807795559711673, "learning_rate": 9.332703655850082e-06, "loss": 0.2088, "step": 2103 }, { "epoch": 0.57, "grad_norm": 2.3289614045043945, "learning_rate": 9.331967944225034e-06, "loss": 0.1775, "step": 2104 }, { "epoch": 0.57, "grad_norm": 3.1916492647709345, "learning_rate": 9.331231856284532e-06, "loss": 0.2639, "step": 2105 }, { "epoch": 0.57, "grad_norm": 3.1850514504843503, "learning_rate": 9.330495392092525e-06, "loss": 0.2554, "step": 2106 }, { "epoch": 0.58, "grad_norm": 2.567207117696942, "learning_rate": 9.32975855171298e-06, "loss": 0.1999, "step": 2107 }, { "epoch": 0.58, "grad_norm": 2.7436969616040896, "learning_rate": 9.329021335209913e-06, "loss": 0.2368, "step": 2108 }, { "epoch": 0.58, "grad_norm": 2.4937594081460155, "learning_rate": 9.328283742647365e-06, "loss": 0.1952, "step": 2109 }, { "epoch": 0.58, "grad_norm": 2.565330792675296, "learning_rate": 9.327545774089407e-06, "loss": 0.2215, "step": 2110 }, { "epoch": 0.58, "grad_norm": 3.0092278156350196, "learning_rate": 9.326807429600148e-06, "loss": 0.2218, "step": 2111 }, { "epoch": 0.58, "grad_norm": 2.6764990407114406, "learning_rate": 9.326068709243727e-06, "loss": 0.209, "step": 2112 }, { "epoch": 0.58, "grad_norm": 2.657926874049776, "learning_rate": 9.325329613084317e-06, "loss": 0.2355, "step": 2113 }, { "epoch": 0.58, "grad_norm": 2.6904005041241925, "learning_rate": 9.324590141186123e-06, "loss": 0.2072, "step": 2114 }, { "epoch": 0.58, "grad_norm": 2.6118869426331783, "learning_rate": 9.32385029361338e-06, "loss": 0.2122, "step": 2115 }, { "epoch": 0.58, "grad_norm": 2.451889794297275, "learning_rate": 9.32311007043036e-06, "loss": 0.2131, "step": 2116 }, { "epoch": 0.58, "grad_norm": 2.4043279314029027, "learning_rate": 9.322369471701367e-06, "loss": 0.1735, "step": 2117 }, { "epoch": 0.58, "grad_norm": 2.413460952984375, "learning_rate": 9.321628497490733e-06, "loss": 0.1775, "step": 2118 }, { "epoch": 0.58, "grad_norm": 2.8278178212703065, "learning_rate": 9.32088714786283e-06, "loss": 0.2253, "step": 2119 }, { "epoch": 0.58, "grad_norm": 2.6485185472846444, "learning_rate": 9.320145422882055e-06, "loss": 0.2233, "step": 2120 }, { "epoch": 0.58, "grad_norm": 2.5247783694907433, "learning_rate": 9.319403322612843e-06, "loss": 0.2247, "step": 2121 }, { "epoch": 0.58, "grad_norm": 2.220936374091986, "learning_rate": 9.31866084711966e-06, "loss": 0.1679, "step": 2122 }, { "epoch": 0.58, "grad_norm": 2.6856875359969132, "learning_rate": 9.317917996467004e-06, "loss": 0.2068, "step": 2123 }, { "epoch": 0.58, "grad_norm": 2.448446404171035, "learning_rate": 9.317174770719404e-06, "loss": 0.2104, "step": 2124 }, { "epoch": 0.58, "grad_norm": 2.6013910956835167, "learning_rate": 9.316431169941427e-06, "loss": 0.2031, "step": 2125 }, { "epoch": 0.58, "grad_norm": 2.5404512092092766, "learning_rate": 9.315687194197667e-06, "loss": 0.1859, "step": 2126 }, { "epoch": 0.58, "grad_norm": 2.536744435897603, "learning_rate": 9.314942843552754e-06, "loss": 0.196, "step": 2127 }, { "epoch": 0.58, "grad_norm": 2.329118337890104, "learning_rate": 9.314198118071349e-06, "loss": 0.1949, "step": 2128 }, { "epoch": 0.58, "grad_norm": 2.3806052355601723, "learning_rate": 9.313453017818144e-06, "loss": 0.203, "step": 2129 }, { "epoch": 0.58, "grad_norm": 3.1550408684846882, "learning_rate": 9.312707542857868e-06, "loss": 0.1966, "step": 2130 }, { "epoch": 0.58, "grad_norm": 2.510214207833926, "learning_rate": 9.311961693255281e-06, "loss": 0.2383, "step": 2131 }, { "epoch": 0.58, "grad_norm": 2.4929614422526685, "learning_rate": 9.311215469075168e-06, "loss": 0.2009, "step": 2132 }, { "epoch": 0.58, "grad_norm": 2.460049342735931, "learning_rate": 9.310468870382362e-06, "loss": 0.176, "step": 2133 }, { "epoch": 0.58, "grad_norm": 2.20905485079759, "learning_rate": 9.309721897241712e-06, "loss": 0.1701, "step": 2134 }, { "epoch": 0.58, "grad_norm": 2.6165381946184447, "learning_rate": 9.30897454971811e-06, "loss": 0.2305, "step": 2135 }, { "epoch": 0.58, "grad_norm": 2.393248267509264, "learning_rate": 9.308226827876478e-06, "loss": 0.2181, "step": 2136 }, { "epoch": 0.58, "grad_norm": 2.7512517490500645, "learning_rate": 9.307478731781772e-06, "loss": 0.2472, "step": 2137 }, { "epoch": 0.58, "grad_norm": 2.2858045216344975, "learning_rate": 9.306730261498973e-06, "loss": 0.2112, "step": 2138 }, { "epoch": 0.58, "grad_norm": 2.5539745999920105, "learning_rate": 9.305981417093106e-06, "loss": 0.2101, "step": 2139 }, { "epoch": 0.58, "grad_norm": 2.4273099077392057, "learning_rate": 9.30523219862922e-06, "loss": 0.2299, "step": 2140 }, { "epoch": 0.58, "grad_norm": 2.5049969085942556, "learning_rate": 9.304482606172401e-06, "loss": 0.2145, "step": 2141 }, { "epoch": 0.58, "grad_norm": 2.699400857466148, "learning_rate": 9.303732639787761e-06, "loss": 0.2082, "step": 2142 }, { "epoch": 0.59, "grad_norm": 2.7153707592171217, "learning_rate": 9.302982299540455e-06, "loss": 0.2369, "step": 2143 }, { "epoch": 0.59, "grad_norm": 2.3292310329969435, "learning_rate": 9.30223158549566e-06, "loss": 0.1644, "step": 2144 }, { "epoch": 0.59, "grad_norm": 2.5661604655312886, "learning_rate": 9.301480497718594e-06, "loss": 0.2138, "step": 2145 }, { "epoch": 0.59, "grad_norm": 2.684056971993484, "learning_rate": 9.300729036274501e-06, "loss": 0.1889, "step": 2146 }, { "epoch": 0.59, "grad_norm": 2.7374630586178834, "learning_rate": 9.29997720122866e-06, "loss": 0.1848, "step": 2147 }, { "epoch": 0.59, "grad_norm": 2.6696952199659463, "learning_rate": 9.299224992646383e-06, "loss": 0.2356, "step": 2148 }, { "epoch": 0.59, "grad_norm": 2.192027484973054, "learning_rate": 9.298472410593013e-06, "loss": 0.1835, "step": 2149 }, { "epoch": 0.59, "grad_norm": 2.2341993225528602, "learning_rate": 9.29771945513393e-06, "loss": 0.1701, "step": 2150 }, { "epoch": 0.59, "grad_norm": 2.7376584481437916, "learning_rate": 9.296966126334538e-06, "loss": 0.2127, "step": 2151 }, { "epoch": 0.59, "grad_norm": 2.4134134284880453, "learning_rate": 9.29621242426028e-06, "loss": 0.2281, "step": 2152 }, { "epoch": 0.59, "grad_norm": 2.3872216621139684, "learning_rate": 9.295458348976632e-06, "loss": 0.1927, "step": 2153 }, { "epoch": 0.59, "grad_norm": 2.3646011573688273, "learning_rate": 9.294703900549096e-06, "loss": 0.1949, "step": 2154 }, { "epoch": 0.59, "grad_norm": 2.6320640584006716, "learning_rate": 9.293949079043212e-06, "loss": 0.2274, "step": 2155 }, { "epoch": 0.59, "grad_norm": 2.8060272356137985, "learning_rate": 9.293193884524554e-06, "loss": 0.2024, "step": 2156 }, { "epoch": 0.59, "grad_norm": 2.521337567867084, "learning_rate": 9.29243831705872e-06, "loss": 0.219, "step": 2157 }, { "epoch": 0.59, "grad_norm": 2.569372889537845, "learning_rate": 9.29168237671135e-06, "loss": 0.1965, "step": 2158 }, { "epoch": 0.59, "grad_norm": 2.9235388025968154, "learning_rate": 9.290926063548109e-06, "loss": 0.2257, "step": 2159 }, { "epoch": 0.59, "grad_norm": 2.3844047758467566, "learning_rate": 9.2901693776347e-06, "loss": 0.2107, "step": 2160 }, { "epoch": 0.59, "grad_norm": 2.4737124488256255, "learning_rate": 9.289412319036854e-06, "loss": 0.1858, "step": 2161 }, { "epoch": 0.59, "grad_norm": 2.1918016217053644, "learning_rate": 9.288654887820337e-06, "loss": 0.2093, "step": 2162 }, { "epoch": 0.59, "grad_norm": 2.60183909879754, "learning_rate": 9.287897084050947e-06, "loss": 0.2014, "step": 2163 }, { "epoch": 0.59, "grad_norm": 2.2452810178972658, "learning_rate": 9.287138907794514e-06, "loss": 0.2155, "step": 2164 }, { "epoch": 0.59, "grad_norm": 2.2047539569481898, "learning_rate": 9.2863803591169e-06, "loss": 0.1951, "step": 2165 }, { "epoch": 0.59, "grad_norm": 2.5579799380737205, "learning_rate": 9.285621438083997e-06, "loss": 0.2227, "step": 2166 }, { "epoch": 0.59, "grad_norm": 2.566045913231194, "learning_rate": 9.284862144761736e-06, "loss": 0.2266, "step": 2167 }, { "epoch": 0.59, "grad_norm": 2.288937732848183, "learning_rate": 9.284102479216076e-06, "loss": 0.1782, "step": 2168 }, { "epoch": 0.59, "grad_norm": 2.5182564029899623, "learning_rate": 9.283342441513008e-06, "loss": 0.2119, "step": 2169 }, { "epoch": 0.59, "grad_norm": 2.650736729461245, "learning_rate": 9.282582031718554e-06, "loss": 0.2321, "step": 2170 }, { "epoch": 0.59, "grad_norm": 2.4123694904963933, "learning_rate": 9.281821249898772e-06, "loss": 0.1949, "step": 2171 }, { "epoch": 0.59, "grad_norm": 2.521404272246224, "learning_rate": 9.281060096119751e-06, "loss": 0.1934, "step": 2172 }, { "epoch": 0.59, "grad_norm": 2.7190636854264847, "learning_rate": 9.280298570447612e-06, "loss": 0.2518, "step": 2173 }, { "epoch": 0.59, "grad_norm": 2.8348195168176717, "learning_rate": 9.279536672948508e-06, "loss": 0.237, "step": 2174 }, { "epoch": 0.59, "grad_norm": 2.568239341563141, "learning_rate": 9.278774403688624e-06, "loss": 0.1879, "step": 2175 }, { "epoch": 0.59, "grad_norm": 2.4247706948958436, "learning_rate": 9.278011762734179e-06, "loss": 0.1856, "step": 2176 }, { "epoch": 0.59, "grad_norm": 2.5458975973234548, "learning_rate": 9.277248750151419e-06, "loss": 0.2187, "step": 2177 }, { "epoch": 0.59, "grad_norm": 2.8079111609954728, "learning_rate": 9.276485366006634e-06, "loss": 0.2132, "step": 2178 }, { "epoch": 0.59, "grad_norm": 2.6980350408333362, "learning_rate": 9.275721610366134e-06, "loss": 0.2008, "step": 2179 }, { "epoch": 0.6, "grad_norm": 2.449053889216658, "learning_rate": 9.274957483296263e-06, "loss": 0.1802, "step": 2180 }, { "epoch": 0.6, "grad_norm": 2.420252226896554, "learning_rate": 9.274192984863409e-06, "loss": 0.1927, "step": 2181 }, { "epoch": 0.6, "grad_norm": 2.720295193423951, "learning_rate": 9.273428115133975e-06, "loss": 0.217, "step": 2182 }, { "epoch": 0.6, "grad_norm": 2.5627759383174817, "learning_rate": 9.27266287417441e-06, "loss": 0.1894, "step": 2183 }, { "epoch": 0.6, "grad_norm": 2.391417106873159, "learning_rate": 9.271897262051186e-06, "loss": 0.2124, "step": 2184 }, { "epoch": 0.6, "grad_norm": 2.6370947993455442, "learning_rate": 9.271131278830815e-06, "loss": 0.19, "step": 2185 }, { "epoch": 0.6, "grad_norm": 2.3735696682383924, "learning_rate": 9.270364924579835e-06, "loss": 0.1989, "step": 2186 }, { "epoch": 0.6, "grad_norm": 2.4382396671086357, "learning_rate": 9.269598199364821e-06, "loss": 0.1997, "step": 2187 }, { "epoch": 0.6, "grad_norm": 2.4815437912767506, "learning_rate": 9.268831103252376e-06, "loss": 0.2058, "step": 2188 }, { "epoch": 0.6, "grad_norm": 2.6260524860068184, "learning_rate": 9.268063636309138e-06, "loss": 0.2074, "step": 2189 }, { "epoch": 0.6, "grad_norm": 2.6209845059794405, "learning_rate": 9.267295798601777e-06, "loss": 0.2057, "step": 2190 }, { "epoch": 0.6, "grad_norm": 2.373248069102358, "learning_rate": 9.266527590196992e-06, "loss": 0.1927, "step": 2191 }, { "epoch": 0.6, "grad_norm": 2.4207634492170342, "learning_rate": 9.265759011161519e-06, "loss": 0.2248, "step": 2192 }, { "epoch": 0.6, "grad_norm": 3.161131946062331, "learning_rate": 9.264990061562125e-06, "loss": 0.2892, "step": 2193 }, { "epoch": 0.6, "grad_norm": 2.7128839987119275, "learning_rate": 9.264220741465606e-06, "loss": 0.2507, "step": 2194 }, { "epoch": 0.6, "grad_norm": 2.9127508809489773, "learning_rate": 9.263451050938792e-06, "loss": 0.2079, "step": 2195 }, { "epoch": 0.6, "grad_norm": 2.602295486422228, "learning_rate": 9.262680990048549e-06, "loss": 0.2015, "step": 2196 }, { "epoch": 0.6, "grad_norm": 2.426353271983918, "learning_rate": 9.261910558861767e-06, "loss": 0.1721, "step": 2197 }, { "epoch": 0.6, "grad_norm": 2.6281971888874023, "learning_rate": 9.261139757445378e-06, "loss": 0.2446, "step": 2198 }, { "epoch": 0.6, "grad_norm": 2.6750678521031217, "learning_rate": 9.260368585866338e-06, "loss": 0.1887, "step": 2199 }, { "epoch": 0.6, "grad_norm": 2.439498290770255, "learning_rate": 9.259597044191635e-06, "loss": 0.1609, "step": 2200 }, { "epoch": 0.6, "grad_norm": 2.57565689154324, "learning_rate": 9.258825132488301e-06, "loss": 0.2046, "step": 2201 }, { "epoch": 0.6, "grad_norm": 2.469957936835051, "learning_rate": 9.258052850823383e-06, "loss": 0.1868, "step": 2202 }, { "epoch": 0.6, "grad_norm": 2.555981929385473, "learning_rate": 9.257280199263975e-06, "loss": 0.2084, "step": 2203 }, { "epoch": 0.6, "grad_norm": 2.6755242032001685, "learning_rate": 9.256507177877191e-06, "loss": 0.2144, "step": 2204 }, { "epoch": 0.6, "grad_norm": 2.9644514387571057, "learning_rate": 9.255733786730187e-06, "loss": 0.2233, "step": 2205 }, { "epoch": 0.6, "grad_norm": 2.4233369565054117, "learning_rate": 9.254960025890146e-06, "loss": 0.1705, "step": 2206 }, { "epoch": 0.6, "grad_norm": 2.786675936072335, "learning_rate": 9.254185895424284e-06, "loss": 0.2475, "step": 2207 }, { "epoch": 0.6, "grad_norm": 2.583731197981944, "learning_rate": 9.253411395399849e-06, "loss": 0.2112, "step": 2208 }, { "epoch": 0.6, "grad_norm": 2.555591835235902, "learning_rate": 9.25263652588412e-06, "loss": 0.2283, "step": 2209 }, { "epoch": 0.6, "grad_norm": 2.7282927973842215, "learning_rate": 9.251861286944415e-06, "loss": 0.183, "step": 2210 }, { "epoch": 0.6, "grad_norm": 2.60227318895484, "learning_rate": 9.251085678648072e-06, "loss": 0.2134, "step": 2211 }, { "epoch": 0.6, "grad_norm": 2.5177181433857774, "learning_rate": 9.25030970106247e-06, "loss": 0.2091, "step": 2212 }, { "epoch": 0.6, "grad_norm": 2.2608998775136233, "learning_rate": 9.249533354255019e-06, "loss": 0.1868, "step": 2213 }, { "epoch": 0.6, "grad_norm": 2.4753627699685024, "learning_rate": 9.248756638293156e-06, "loss": 0.2058, "step": 2214 }, { "epoch": 0.6, "grad_norm": 2.103874205400295, "learning_rate": 9.24797955324436e-06, "loss": 0.1633, "step": 2215 }, { "epoch": 0.6, "grad_norm": 2.3019823158672947, "learning_rate": 9.24720209917613e-06, "loss": 0.1926, "step": 2216 }, { "epoch": 0.61, "grad_norm": 2.4833454095303154, "learning_rate": 9.246424276156008e-06, "loss": 0.1709, "step": 2217 }, { "epoch": 0.61, "grad_norm": 2.6954470571054947, "learning_rate": 9.245646084251558e-06, "loss": 0.2511, "step": 2218 }, { "epoch": 0.61, "grad_norm": 2.5725226450068868, "learning_rate": 9.244867523530385e-06, "loss": 0.1748, "step": 2219 }, { "epoch": 0.61, "grad_norm": 2.4366714626084462, "learning_rate": 9.24408859406012e-06, "loss": 0.1892, "step": 2220 }, { "epoch": 0.61, "grad_norm": 2.9003770944177596, "learning_rate": 9.243309295908429e-06, "loss": 0.2223, "step": 2221 }, { "epoch": 0.61, "grad_norm": 2.373805078966787, "learning_rate": 9.24252962914301e-06, "loss": 0.1882, "step": 2222 }, { "epoch": 0.61, "grad_norm": 2.3643264728169773, "learning_rate": 9.241749593831588e-06, "loss": 0.2111, "step": 2223 }, { "epoch": 0.61, "grad_norm": 2.4892635326650145, "learning_rate": 9.24096919004193e-06, "loss": 0.2004, "step": 2224 }, { "epoch": 0.61, "grad_norm": 2.2260087973239893, "learning_rate": 9.240188417841824e-06, "loss": 0.1972, "step": 2225 }, { "epoch": 0.61, "grad_norm": 2.4923252320051366, "learning_rate": 9.239407277299101e-06, "loss": 0.1983, "step": 2226 }, { "epoch": 0.61, "grad_norm": 3.38887599003807, "learning_rate": 9.238625768481612e-06, "loss": 0.2028, "step": 2227 }, { "epoch": 0.61, "grad_norm": 2.702123916649951, "learning_rate": 9.23784389145725e-06, "loss": 0.2068, "step": 2228 }, { "epoch": 0.61, "grad_norm": 2.524056496186482, "learning_rate": 9.237061646293937e-06, "loss": 0.2283, "step": 2229 }, { "epoch": 0.61, "grad_norm": 2.6790704610160523, "learning_rate": 9.236279033059622e-06, "loss": 0.2152, "step": 2230 }, { "epoch": 0.61, "grad_norm": 2.531486803326059, "learning_rate": 9.235496051822293e-06, "loss": 0.1936, "step": 2231 }, { "epoch": 0.61, "grad_norm": 2.4294877192058992, "learning_rate": 9.234712702649969e-06, "loss": 0.2114, "step": 2232 }, { "epoch": 0.61, "grad_norm": 2.941051943504525, "learning_rate": 9.233928985610693e-06, "loss": 0.23, "step": 2233 }, { "epoch": 0.61, "grad_norm": 2.4661294728495418, "learning_rate": 9.233144900772553e-06, "loss": 0.1802, "step": 2234 }, { "epoch": 0.61, "grad_norm": 2.562296352742423, "learning_rate": 9.232360448203658e-06, "loss": 0.1911, "step": 2235 }, { "epoch": 0.61, "grad_norm": 3.3751514612177744, "learning_rate": 9.231575627972153e-06, "loss": 0.2145, "step": 2236 }, { "epoch": 0.61, "grad_norm": 2.6790040889535183, "learning_rate": 9.230790440146216e-06, "loss": 0.2122, "step": 2237 }, { "epoch": 0.61, "grad_norm": 2.6196786702959645, "learning_rate": 9.230004884794056e-06, "loss": 0.2186, "step": 2238 }, { "epoch": 0.61, "grad_norm": 2.507917127533199, "learning_rate": 9.229218961983913e-06, "loss": 0.1737, "step": 2239 }, { "epoch": 0.61, "grad_norm": 2.571893225282125, "learning_rate": 9.228432671784057e-06, "loss": 0.18, "step": 2240 }, { "epoch": 0.61, "grad_norm": 2.6517320268659414, "learning_rate": 9.227646014262799e-06, "loss": 0.2132, "step": 2241 }, { "epoch": 0.61, "grad_norm": 2.4226470358598933, "learning_rate": 9.22685898948847e-06, "loss": 0.1812, "step": 2242 }, { "epoch": 0.61, "grad_norm": 2.691860727993957, "learning_rate": 9.22607159752944e-06, "loss": 0.2385, "step": 2243 }, { "epoch": 0.61, "grad_norm": 2.610457567357634, "learning_rate": 9.225283838454111e-06, "loss": 0.2069, "step": 2244 }, { "epoch": 0.61, "grad_norm": 2.287272287222549, "learning_rate": 9.224495712330911e-06, "loss": 0.1833, "step": 2245 }, { "epoch": 0.61, "grad_norm": 2.6967522817328424, "learning_rate": 9.223707219228309e-06, "loss": 0.2606, "step": 2246 }, { "epoch": 0.61, "grad_norm": 2.5677464355327273, "learning_rate": 9.222918359214798e-06, "loss": 0.2088, "step": 2247 }, { "epoch": 0.61, "grad_norm": 2.228138654674976, "learning_rate": 9.222129132358905e-06, "loss": 0.1955, "step": 2248 }, { "epoch": 0.61, "grad_norm": 2.7524106732804516, "learning_rate": 9.221339538729191e-06, "loss": 0.2326, "step": 2249 }, { "epoch": 0.61, "grad_norm": 2.8469162027419754, "learning_rate": 9.220549578394249e-06, "loss": 0.2475, "step": 2250 }, { "epoch": 0.61, "grad_norm": 2.5654107410252474, "learning_rate": 9.2197592514227e-06, "loss": 0.1996, "step": 2251 }, { "epoch": 0.61, "grad_norm": 2.85828058348352, "learning_rate": 9.2189685578832e-06, "loss": 0.2241, "step": 2252 }, { "epoch": 0.62, "grad_norm": 4.6227627857354525, "learning_rate": 9.218177497844438e-06, "loss": 0.2305, "step": 2253 }, { "epoch": 0.62, "grad_norm": 2.489952855966967, "learning_rate": 9.217386071375129e-06, "loss": 0.2072, "step": 2254 }, { "epoch": 0.62, "grad_norm": 2.3319205365781035, "learning_rate": 9.216594278544026e-06, "loss": 0.1919, "step": 2255 }, { "epoch": 0.62, "grad_norm": 2.3848896374278543, "learning_rate": 9.215802119419912e-06, "loss": 0.2, "step": 2256 }, { "epoch": 0.62, "grad_norm": 2.7626309489641967, "learning_rate": 9.2150095940716e-06, "loss": 0.2021, "step": 2257 }, { "epoch": 0.62, "grad_norm": 2.3814894832398634, "learning_rate": 9.214216702567937e-06, "loss": 0.1596, "step": 2258 }, { "epoch": 0.62, "grad_norm": 2.4858932665283797, "learning_rate": 9.213423444977802e-06, "loss": 0.1887, "step": 2259 }, { "epoch": 0.62, "grad_norm": 2.8814946682273184, "learning_rate": 9.212629821370104e-06, "loss": 0.2191, "step": 2260 }, { "epoch": 0.62, "grad_norm": 2.3771866338389738, "learning_rate": 9.211835831813782e-06, "loss": 0.1945, "step": 2261 }, { "epoch": 0.62, "grad_norm": 2.5824248953859152, "learning_rate": 9.211041476377815e-06, "loss": 0.1795, "step": 2262 }, { "epoch": 0.62, "grad_norm": 2.6829547605660515, "learning_rate": 9.210246755131204e-06, "loss": 0.2167, "step": 2263 }, { "epoch": 0.62, "grad_norm": 2.8895884976101955, "learning_rate": 9.209451668142985e-06, "loss": 0.2107, "step": 2264 }, { "epoch": 0.62, "grad_norm": 2.3545112337814396, "learning_rate": 9.20865621548223e-06, "loss": 0.1627, "step": 2265 }, { "epoch": 0.62, "grad_norm": 2.643819032093641, "learning_rate": 9.20786039721804e-06, "loss": 0.2269, "step": 2266 }, { "epoch": 0.62, "grad_norm": 2.5771608728719277, "learning_rate": 9.207064213419543e-06, "loss": 0.1847, "step": 2267 }, { "epoch": 0.62, "grad_norm": 2.4209425767600563, "learning_rate": 9.206267664155906e-06, "loss": 0.2118, "step": 2268 }, { "epoch": 0.62, "grad_norm": 2.5796833818822664, "learning_rate": 9.205470749496326e-06, "loss": 0.226, "step": 2269 }, { "epoch": 0.62, "grad_norm": 2.842896162356135, "learning_rate": 9.204673469510025e-06, "loss": 0.1982, "step": 2270 }, { "epoch": 0.62, "grad_norm": 2.529868123364016, "learning_rate": 9.203875824266269e-06, "loss": 0.1942, "step": 2271 }, { "epoch": 0.62, "grad_norm": 2.302580492244617, "learning_rate": 9.203077813834345e-06, "loss": 0.1705, "step": 2272 }, { "epoch": 0.62, "grad_norm": 2.1416593207256684, "learning_rate": 9.202279438283577e-06, "loss": 0.1855, "step": 2273 }, { "epoch": 0.62, "grad_norm": 2.492684078471484, "learning_rate": 9.201480697683319e-06, "loss": 0.1913, "step": 2274 }, { "epoch": 0.62, "grad_norm": 2.6180141685541685, "learning_rate": 9.200681592102955e-06, "loss": 0.2137, "step": 2275 }, { "epoch": 0.62, "grad_norm": 2.630713575189787, "learning_rate": 9.199882121611907e-06, "loss": 0.2268, "step": 2276 }, { "epoch": 0.62, "grad_norm": 2.499227011095003, "learning_rate": 9.199082286279622e-06, "loss": 0.2072, "step": 2277 }, { "epoch": 0.62, "grad_norm": 2.2589567657768264, "learning_rate": 9.198282086175582e-06, "loss": 0.1974, "step": 2278 }, { "epoch": 0.62, "grad_norm": 2.30818219670286, "learning_rate": 9.197481521369299e-06, "loss": 0.1922, "step": 2279 }, { "epoch": 0.62, "grad_norm": 2.6818294975956243, "learning_rate": 9.196680591930318e-06, "loss": 0.1994, "step": 2280 }, { "epoch": 0.62, "grad_norm": 2.581348724363768, "learning_rate": 9.195879297928217e-06, "loss": 0.1948, "step": 2281 }, { "epoch": 0.62, "grad_norm": 2.373275487807223, "learning_rate": 9.195077639432599e-06, "loss": 0.181, "step": 2282 }, { "epoch": 0.62, "grad_norm": 2.789135756288639, "learning_rate": 9.19427561651311e-06, "loss": 0.2037, "step": 2283 }, { "epoch": 0.62, "grad_norm": 2.4823239168037237, "learning_rate": 9.193473229239417e-06, "loss": 0.21, "step": 2284 }, { "epoch": 0.62, "grad_norm": 2.657455426959781, "learning_rate": 9.192670477681224e-06, "loss": 0.2134, "step": 2285 }, { "epoch": 0.62, "grad_norm": 2.8368552341607973, "learning_rate": 9.191867361908265e-06, "loss": 0.2164, "step": 2286 }, { "epoch": 0.62, "grad_norm": 2.372927718917402, "learning_rate": 9.191063881990308e-06, "loss": 0.1829, "step": 2287 }, { "epoch": 0.62, "grad_norm": 2.4615733313114796, "learning_rate": 9.190260037997149e-06, "loss": 0.1821, "step": 2288 }, { "epoch": 0.62, "grad_norm": 3.955995801986834, "learning_rate": 9.18945582999862e-06, "loss": 0.1968, "step": 2289 }, { "epoch": 0.63, "grad_norm": 2.99586152877953, "learning_rate": 9.188651258064578e-06, "loss": 0.2663, "step": 2290 }, { "epoch": 0.63, "grad_norm": 2.3867822819661932, "learning_rate": 9.187846322264918e-06, "loss": 0.2123, "step": 2291 }, { "epoch": 0.63, "grad_norm": 2.4441363284071396, "learning_rate": 9.187041022669562e-06, "loss": 0.1639, "step": 2292 }, { "epoch": 0.63, "grad_norm": 3.2085418968273136, "learning_rate": 9.186235359348472e-06, "loss": 0.2219, "step": 2293 }, { "epoch": 0.63, "grad_norm": 2.5316935222268997, "learning_rate": 9.18542933237163e-06, "loss": 0.2067, "step": 2294 }, { "epoch": 0.63, "grad_norm": 2.4250991928934607, "learning_rate": 9.184622941809056e-06, "loss": 0.2069, "step": 2295 }, { "epoch": 0.63, "grad_norm": 2.451692458129849, "learning_rate": 9.183816187730801e-06, "loss": 0.2226, "step": 2296 }, { "epoch": 0.63, "grad_norm": 4.819762800542777, "learning_rate": 9.183009070206947e-06, "loss": 0.1914, "step": 2297 }, { "epoch": 0.63, "grad_norm": 2.1409874568250555, "learning_rate": 9.18220158930761e-06, "loss": 0.1817, "step": 2298 }, { "epoch": 0.63, "grad_norm": 2.7705844408200377, "learning_rate": 9.181393745102933e-06, "loss": 0.2391, "step": 2299 }, { "epoch": 0.63, "grad_norm": 2.3212913216712927, "learning_rate": 9.180585537663093e-06, "loss": 0.211, "step": 2300 }, { "epoch": 0.63, "grad_norm": 2.490546854112704, "learning_rate": 9.179776967058301e-06, "loss": 0.2167, "step": 2301 }, { "epoch": 0.63, "grad_norm": 2.785336218043496, "learning_rate": 9.178968033358792e-06, "loss": 0.2155, "step": 2302 }, { "epoch": 0.63, "grad_norm": 2.529619917330041, "learning_rate": 9.178158736634843e-06, "loss": 0.2219, "step": 2303 }, { "epoch": 0.63, "grad_norm": 3.141144489191175, "learning_rate": 9.177349076956755e-06, "loss": 0.2405, "step": 2304 }, { "epoch": 0.63, "grad_norm": 2.390693943568701, "learning_rate": 9.176539054394861e-06, "loss": 0.1634, "step": 2305 }, { "epoch": 0.63, "grad_norm": 2.4829239723812413, "learning_rate": 9.17572866901953e-06, "loss": 0.2161, "step": 2306 }, { "epoch": 0.63, "grad_norm": 2.3760852544012767, "learning_rate": 9.174917920901156e-06, "loss": 0.1572, "step": 2307 }, { "epoch": 0.63, "grad_norm": 2.2753702872193706, "learning_rate": 9.174106810110173e-06, "loss": 0.1803, "step": 2308 }, { "epoch": 0.63, "grad_norm": 2.424409432888883, "learning_rate": 9.173295336717039e-06, "loss": 0.2061, "step": 2309 }, { "epoch": 0.63, "grad_norm": 2.380403302759444, "learning_rate": 9.172483500792246e-06, "loss": 0.2143, "step": 2310 }, { "epoch": 0.63, "grad_norm": 2.366735171800615, "learning_rate": 9.171671302406317e-06, "loss": 0.2072, "step": 2311 }, { "epoch": 0.63, "grad_norm": 2.5404571786951338, "learning_rate": 9.17085874162981e-06, "loss": 0.1761, "step": 2312 }, { "epoch": 0.63, "grad_norm": 2.5021129312986186, "learning_rate": 9.17004581853331e-06, "loss": 0.23, "step": 2313 }, { "epoch": 0.63, "grad_norm": 2.423851749270581, "learning_rate": 9.169232533187434e-06, "loss": 0.1916, "step": 2314 }, { "epoch": 0.63, "grad_norm": 2.277235982766273, "learning_rate": 9.168418885662833e-06, "loss": 0.1872, "step": 2315 }, { "epoch": 0.63, "grad_norm": 2.524266286636938, "learning_rate": 9.16760487603019e-06, "loss": 0.2128, "step": 2316 }, { "epoch": 0.63, "grad_norm": 2.4324222436109526, "learning_rate": 9.166790504360213e-06, "loss": 0.1722, "step": 2317 }, { "epoch": 0.63, "grad_norm": 2.775058622059398, "learning_rate": 9.165975770723649e-06, "loss": 0.2067, "step": 2318 }, { "epoch": 0.63, "grad_norm": 2.7675433299186416, "learning_rate": 9.165160675191272e-06, "loss": 0.2304, "step": 2319 }, { "epoch": 0.63, "grad_norm": 2.4508904010052195, "learning_rate": 9.164345217833892e-06, "loss": 0.1771, "step": 2320 }, { "epoch": 0.63, "grad_norm": 2.928070351068871, "learning_rate": 9.163529398722341e-06, "loss": 0.1783, "step": 2321 }, { "epoch": 0.63, "grad_norm": 2.532753074523733, "learning_rate": 9.162713217927496e-06, "loss": 0.1929, "step": 2322 }, { "epoch": 0.63, "grad_norm": 2.4023598033593556, "learning_rate": 9.161896675520255e-06, "loss": 0.178, "step": 2323 }, { "epoch": 0.63, "grad_norm": 2.521448453266078, "learning_rate": 9.161079771571548e-06, "loss": 0.1823, "step": 2324 }, { "epoch": 0.63, "grad_norm": 2.2797182925069497, "learning_rate": 9.160262506152343e-06, "loss": 0.2003, "step": 2325 }, { "epoch": 0.63, "grad_norm": 2.4963420733253887, "learning_rate": 9.159444879333632e-06, "loss": 0.1956, "step": 2326 }, { "epoch": 0.64, "grad_norm": 2.5921579676631157, "learning_rate": 9.158626891186444e-06, "loss": 0.2148, "step": 2327 }, { "epoch": 0.64, "grad_norm": 2.5903548975508017, "learning_rate": 9.157808541781837e-06, "loss": 0.2428, "step": 2328 }, { "epoch": 0.64, "grad_norm": 2.688871673826555, "learning_rate": 9.1569898311909e-06, "loss": 0.1799, "step": 2329 }, { "epoch": 0.64, "grad_norm": 2.6669491093957283, "learning_rate": 9.156170759484754e-06, "loss": 0.2222, "step": 2330 }, { "epoch": 0.64, "grad_norm": 2.2530311279808366, "learning_rate": 9.15535132673455e-06, "loss": 0.1704, "step": 2331 }, { "epoch": 0.64, "grad_norm": 2.525118043745158, "learning_rate": 9.154531533011474e-06, "loss": 0.2325, "step": 2332 }, { "epoch": 0.64, "grad_norm": 2.611461519542473, "learning_rate": 9.15371137838674e-06, "loss": 0.2184, "step": 2333 }, { "epoch": 0.64, "grad_norm": 2.5116887195049347, "learning_rate": 9.152890862931594e-06, "loss": 0.1927, "step": 2334 }, { "epoch": 0.64, "grad_norm": 2.6384672348798244, "learning_rate": 9.152069986717313e-06, "loss": 0.2143, "step": 2335 }, { "epoch": 0.64, "grad_norm": 2.602262864379223, "learning_rate": 9.151248749815208e-06, "loss": 0.2304, "step": 2336 }, { "epoch": 0.64, "grad_norm": 2.4878049048128283, "learning_rate": 9.150427152296617e-06, "loss": 0.2165, "step": 2337 }, { "epoch": 0.64, "grad_norm": 2.449314616898853, "learning_rate": 9.149605194232915e-06, "loss": 0.2058, "step": 2338 }, { "epoch": 0.64, "grad_norm": 2.616224754079725, "learning_rate": 9.1487828756955e-06, "loss": 0.204, "step": 2339 }, { "epoch": 0.64, "grad_norm": 2.826569344632306, "learning_rate": 9.147960196755811e-06, "loss": 0.2051, "step": 2340 }, { "epoch": 0.64, "grad_norm": 2.632979306591604, "learning_rate": 9.147137157485313e-06, "loss": 0.2354, "step": 2341 }, { "epoch": 0.64, "grad_norm": 2.614383356719092, "learning_rate": 9.146313757955501e-06, "loss": 0.2203, "step": 2342 }, { "epoch": 0.64, "grad_norm": 2.6829268903903087, "learning_rate": 9.145489998237902e-06, "loss": 0.2123, "step": 2343 }, { "epoch": 0.64, "grad_norm": 2.7470750383820572, "learning_rate": 9.14466587840408e-06, "loss": 0.2152, "step": 2344 }, { "epoch": 0.64, "grad_norm": 2.4731689934132466, "learning_rate": 9.143841398525621e-06, "loss": 0.1678, "step": 2345 }, { "epoch": 0.64, "grad_norm": 2.7265545322283593, "learning_rate": 9.14301655867415e-06, "loss": 0.2235, "step": 2346 }, { "epoch": 0.64, "grad_norm": 2.4005540914942793, "learning_rate": 9.14219135892132e-06, "loss": 0.1817, "step": 2347 }, { "epoch": 0.64, "grad_norm": 2.63075932459022, "learning_rate": 9.141365799338817e-06, "loss": 0.222, "step": 2348 }, { "epoch": 0.64, "grad_norm": 2.637870873628337, "learning_rate": 9.140539879998353e-06, "loss": 0.2368, "step": 2349 }, { "epoch": 0.64, "grad_norm": 2.2160145430440275, "learning_rate": 9.139713600971677e-06, "loss": 0.185, "step": 2350 }, { "epoch": 0.64, "grad_norm": 2.705252575292958, "learning_rate": 9.13888696233057e-06, "loss": 0.2223, "step": 2351 }, { "epoch": 0.64, "grad_norm": 2.529558226859568, "learning_rate": 9.138059964146839e-06, "loss": 0.1875, "step": 2352 }, { "epoch": 0.64, "grad_norm": 2.364523498187168, "learning_rate": 9.137232606492323e-06, "loss": 0.2073, "step": 2353 }, { "epoch": 0.64, "grad_norm": 2.5583423616556535, "learning_rate": 9.136404889438898e-06, "loss": 0.2287, "step": 2354 }, { "epoch": 0.64, "grad_norm": 2.4761735879009543, "learning_rate": 9.135576813058465e-06, "loss": 0.1977, "step": 2355 }, { "epoch": 0.64, "grad_norm": 2.7569134643346818, "learning_rate": 9.134748377422959e-06, "loss": 0.2077, "step": 2356 }, { "epoch": 0.64, "grad_norm": 2.29017605428271, "learning_rate": 9.133919582604344e-06, "loss": 0.1863, "step": 2357 }, { "epoch": 0.64, "grad_norm": 2.7071280771654864, "learning_rate": 9.133090428674621e-06, "loss": 0.1938, "step": 2358 }, { "epoch": 0.64, "grad_norm": 2.6404789634689285, "learning_rate": 9.132260915705814e-06, "loss": 0.1861, "step": 2359 }, { "epoch": 0.64, "grad_norm": 3.1560569466912836, "learning_rate": 9.131431043769986e-06, "loss": 0.2064, "step": 2360 }, { "epoch": 0.64, "grad_norm": 2.1571917601712345, "learning_rate": 9.130600812939223e-06, "loss": 0.1698, "step": 2361 }, { "epoch": 0.64, "grad_norm": 2.517039789499306, "learning_rate": 9.12977022328565e-06, "loss": 0.201, "step": 2362 }, { "epoch": 0.65, "grad_norm": 2.6096427349526934, "learning_rate": 9.12893927488142e-06, "loss": 0.2066, "step": 2363 }, { "epoch": 0.65, "grad_norm": 2.35847259995961, "learning_rate": 9.128107967798716e-06, "loss": 0.1938, "step": 2364 }, { "epoch": 0.65, "grad_norm": 2.5139641439215072, "learning_rate": 9.127276302109751e-06, "loss": 0.1883, "step": 2365 }, { "epoch": 0.65, "grad_norm": 2.5282524066097487, "learning_rate": 9.126444277886775e-06, "loss": 0.2252, "step": 2366 }, { "epoch": 0.65, "grad_norm": 2.2701435754023134, "learning_rate": 9.125611895202062e-06, "loss": 0.1505, "step": 2367 }, { "epoch": 0.65, "grad_norm": 2.31553176464601, "learning_rate": 9.124779154127925e-06, "loss": 0.1861, "step": 2368 }, { "epoch": 0.65, "grad_norm": 2.5582724634325844, "learning_rate": 9.123946054736699e-06, "loss": 0.192, "step": 2369 }, { "epoch": 0.65, "grad_norm": 2.4666701265703606, "learning_rate": 9.123112597100759e-06, "loss": 0.1873, "step": 2370 }, { "epoch": 0.65, "grad_norm": 2.4398544974358582, "learning_rate": 9.122278781292502e-06, "loss": 0.1956, "step": 2371 }, { "epoch": 0.65, "grad_norm": 2.5212396981436584, "learning_rate": 9.121444607384366e-06, "loss": 0.2083, "step": 2372 }, { "epoch": 0.65, "grad_norm": 2.4531087112740484, "learning_rate": 9.120610075448812e-06, "loss": 0.2159, "step": 2373 }, { "epoch": 0.65, "grad_norm": 2.521625924341954, "learning_rate": 9.119775185558337e-06, "loss": 0.205, "step": 2374 }, { "epoch": 0.65, "grad_norm": 2.9055400242238525, "learning_rate": 9.118939937785468e-06, "loss": 0.2391, "step": 2375 }, { "epoch": 0.65, "grad_norm": 3.127400798654293, "learning_rate": 9.11810433220276e-06, "loss": 0.2125, "step": 2376 }, { "epoch": 0.65, "grad_norm": 2.712235334496407, "learning_rate": 9.117268368882804e-06, "loss": 0.2485, "step": 2377 }, { "epoch": 0.65, "grad_norm": 2.244793115850796, "learning_rate": 9.116432047898218e-06, "loss": 0.1939, "step": 2378 }, { "epoch": 0.65, "grad_norm": 2.547615944695285, "learning_rate": 9.115595369321653e-06, "loss": 0.2319, "step": 2379 }, { "epoch": 0.65, "grad_norm": 2.5217760753167404, "learning_rate": 9.11475833322579e-06, "loss": 0.1851, "step": 2380 }, { "epoch": 0.65, "grad_norm": 2.662425007047709, "learning_rate": 9.113920939683343e-06, "loss": 0.2018, "step": 2381 }, { "epoch": 0.65, "grad_norm": 2.450000356913756, "learning_rate": 9.113083188767057e-06, "loss": 0.2029, "step": 2382 }, { "epoch": 0.65, "grad_norm": 2.707311504512976, "learning_rate": 9.112245080549705e-06, "loss": 0.2436, "step": 2383 }, { "epoch": 0.65, "grad_norm": 2.327568324823103, "learning_rate": 9.111406615104093e-06, "loss": 0.1746, "step": 2384 }, { "epoch": 0.65, "grad_norm": 2.6303578016025506, "learning_rate": 9.11056779250306e-06, "loss": 0.2035, "step": 2385 }, { "epoch": 0.65, "grad_norm": 2.473639109241671, "learning_rate": 9.10972861281947e-06, "loss": 0.1922, "step": 2386 }, { "epoch": 0.65, "grad_norm": 2.3177880383312552, "learning_rate": 9.108889076126226e-06, "loss": 0.1689, "step": 2387 }, { "epoch": 0.65, "grad_norm": 2.357540115084761, "learning_rate": 9.108049182496258e-06, "loss": 0.1958, "step": 2388 }, { "epoch": 0.65, "grad_norm": 2.4387198546760693, "learning_rate": 9.107208932002524e-06, "loss": 0.2026, "step": 2389 }, { "epoch": 0.65, "grad_norm": 2.4623057030615767, "learning_rate": 9.106368324718018e-06, "loss": 0.1777, "step": 2390 }, { "epoch": 0.65, "grad_norm": 2.4349492491255837, "learning_rate": 9.105527360715762e-06, "loss": 0.2273, "step": 2391 }, { "epoch": 0.65, "grad_norm": 2.5841649533417765, "learning_rate": 9.104686040068813e-06, "loss": 0.225, "step": 2392 }, { "epoch": 0.65, "grad_norm": 3.473556789946725, "learning_rate": 9.103844362850252e-06, "loss": 0.1945, "step": 2393 }, { "epoch": 0.65, "grad_norm": 2.2122557317828595, "learning_rate": 9.103002329133198e-06, "loss": 0.1831, "step": 2394 }, { "epoch": 0.65, "grad_norm": 2.2404121280900147, "learning_rate": 9.102159938990795e-06, "loss": 0.1823, "step": 2395 }, { "epoch": 0.65, "grad_norm": 2.4931415163162076, "learning_rate": 9.101317192496223e-06, "loss": 0.1873, "step": 2396 }, { "epoch": 0.65, "grad_norm": 2.5390114877106584, "learning_rate": 9.100474089722693e-06, "loss": 0.2256, "step": 2397 }, { "epoch": 0.65, "grad_norm": 2.33274111323077, "learning_rate": 9.09963063074344e-06, "loss": 0.1917, "step": 2398 }, { "epoch": 0.65, "grad_norm": 2.533292050495369, "learning_rate": 9.09878681563174e-06, "loss": 0.2359, "step": 2399 }, { "epoch": 0.66, "grad_norm": 2.5700281243605305, "learning_rate": 9.097942644460889e-06, "loss": 0.2128, "step": 2400 }, { "epoch": 0.66, "grad_norm": 2.4012519604445712, "learning_rate": 9.097098117304223e-06, "loss": 0.1974, "step": 2401 }, { "epoch": 0.66, "grad_norm": 2.344381550523882, "learning_rate": 9.096253234235106e-06, "loss": 0.1738, "step": 2402 }, { "epoch": 0.66, "grad_norm": 2.489285497686882, "learning_rate": 9.095407995326932e-06, "loss": 0.1938, "step": 2403 }, { "epoch": 0.66, "grad_norm": 2.095489186777263, "learning_rate": 9.094562400653127e-06, "loss": 0.1659, "step": 2404 }, { "epoch": 0.66, "grad_norm": 2.556714026206172, "learning_rate": 9.093716450287144e-06, "loss": 0.1799, "step": 2405 }, { "epoch": 0.66, "grad_norm": 2.3237891630283176, "learning_rate": 9.092870144302473e-06, "loss": 0.1819, "step": 2406 }, { "epoch": 0.66, "grad_norm": 2.5541341537198963, "learning_rate": 9.092023482772632e-06, "loss": 0.2073, "step": 2407 }, { "epoch": 0.66, "grad_norm": 2.3328547608535075, "learning_rate": 9.09117646577117e-06, "loss": 0.1799, "step": 2408 }, { "epoch": 0.66, "grad_norm": 2.859401939505249, "learning_rate": 9.090329093371667e-06, "loss": 0.2167, "step": 2409 }, { "epoch": 0.66, "grad_norm": 2.818978506251009, "learning_rate": 9.089481365647731e-06, "loss": 0.2378, "step": 2410 }, { "epoch": 0.66, "grad_norm": 2.3793785038393134, "learning_rate": 9.088633282673007e-06, "loss": 0.2031, "step": 2411 }, { "epoch": 0.66, "grad_norm": 2.551510400384724, "learning_rate": 9.087784844521165e-06, "loss": 0.1998, "step": 2412 }, { "epoch": 0.66, "grad_norm": 2.575366290435541, "learning_rate": 9.086936051265911e-06, "loss": 0.1912, "step": 2413 }, { "epoch": 0.66, "grad_norm": 2.388430625458781, "learning_rate": 9.086086902980977e-06, "loss": 0.1829, "step": 2414 }, { "epoch": 0.66, "grad_norm": 2.3011581260158582, "learning_rate": 9.08523739974013e-06, "loss": 0.1944, "step": 2415 }, { "epoch": 0.66, "grad_norm": 2.688993983604584, "learning_rate": 9.084387541617163e-06, "loss": 0.2373, "step": 2416 }, { "epoch": 0.66, "grad_norm": 2.280031351020771, "learning_rate": 9.083537328685905e-06, "loss": 0.186, "step": 2417 }, { "epoch": 0.66, "grad_norm": 2.278659189003653, "learning_rate": 9.082686761020213e-06, "loss": 0.1908, "step": 2418 }, { "epoch": 0.66, "grad_norm": 2.358950756818462, "learning_rate": 9.081835838693975e-06, "loss": 0.1911, "step": 2419 }, { "epoch": 0.66, "grad_norm": 2.127556016921645, "learning_rate": 9.08098456178111e-06, "loss": 0.1829, "step": 2420 }, { "epoch": 0.66, "grad_norm": 2.5318188618630315, "learning_rate": 9.080132930355567e-06, "loss": 0.2198, "step": 2421 }, { "epoch": 0.66, "grad_norm": 2.79596352908327, "learning_rate": 9.079280944491328e-06, "loss": 0.2098, "step": 2422 }, { "epoch": 0.66, "grad_norm": 2.3481607225525325, "learning_rate": 9.078428604262404e-06, "loss": 0.1626, "step": 2423 }, { "epoch": 0.66, "grad_norm": 2.1612195744560623, "learning_rate": 9.07757590974284e-06, "loss": 0.1624, "step": 2424 }, { "epoch": 0.66, "grad_norm": 2.5113771269556318, "learning_rate": 9.076722861006703e-06, "loss": 0.2046, "step": 2425 }, { "epoch": 0.66, "grad_norm": 2.599015448839853, "learning_rate": 9.075869458128104e-06, "loss": 0.2052, "step": 2426 }, { "epoch": 0.66, "grad_norm": 2.61695649889905, "learning_rate": 9.075015701181171e-06, "loss": 0.2105, "step": 2427 }, { "epoch": 0.66, "grad_norm": 2.1843430611268624, "learning_rate": 9.074161590240073e-06, "loss": 0.2002, "step": 2428 }, { "epoch": 0.66, "grad_norm": 2.6209019914824756, "learning_rate": 9.073307125379007e-06, "loss": 0.2016, "step": 2429 }, { "epoch": 0.66, "grad_norm": 2.7647355309744546, "learning_rate": 9.072452306672197e-06, "loss": 0.188, "step": 2430 }, { "epoch": 0.66, "grad_norm": 2.377126561331576, "learning_rate": 9.071597134193902e-06, "loss": 0.2037, "step": 2431 }, { "epoch": 0.66, "grad_norm": 2.4947093898491084, "learning_rate": 9.070741608018412e-06, "loss": 0.2051, "step": 2432 }, { "epoch": 0.66, "grad_norm": 2.2788322416194218, "learning_rate": 9.06988572822004e-06, "loss": 0.1962, "step": 2433 }, { "epoch": 0.66, "grad_norm": 2.0697105314086937, "learning_rate": 9.069029494873143e-06, "loss": 0.1682, "step": 2434 }, { "epoch": 0.66, "grad_norm": 2.7096674897716584, "learning_rate": 9.0681729080521e-06, "loss": 0.1893, "step": 2435 }, { "epoch": 0.67, "grad_norm": 2.6083361680593873, "learning_rate": 9.067315967831318e-06, "loss": 0.2095, "step": 2436 }, { "epoch": 0.67, "grad_norm": 2.4684044310368867, "learning_rate": 9.066458674285244e-06, "loss": 0.1899, "step": 2437 }, { "epoch": 0.67, "grad_norm": 2.3594764790592024, "learning_rate": 9.065601027488345e-06, "loss": 0.1698, "step": 2438 }, { "epoch": 0.67, "grad_norm": 2.6024058463927062, "learning_rate": 9.064743027515127e-06, "loss": 0.2011, "step": 2439 }, { "epoch": 0.67, "grad_norm": 2.482605069921783, "learning_rate": 9.06388467444013e-06, "loss": 0.1787, "step": 2440 }, { "epoch": 0.67, "grad_norm": 3.0751819433097958, "learning_rate": 9.063025968337909e-06, "loss": 0.2365, "step": 2441 }, { "epoch": 0.67, "grad_norm": 2.4685563561276096, "learning_rate": 9.062166909283062e-06, "loss": 0.2171, "step": 2442 }, { "epoch": 0.67, "grad_norm": 2.5874773352701843, "learning_rate": 9.061307497350218e-06, "loss": 0.2034, "step": 2443 }, { "epoch": 0.67, "grad_norm": 2.4994176068440694, "learning_rate": 9.060447732614032e-06, "loss": 0.1794, "step": 2444 }, { "epoch": 0.67, "grad_norm": 2.747185862872241, "learning_rate": 9.05958761514919e-06, "loss": 0.1889, "step": 2445 }, { "epoch": 0.67, "grad_norm": 2.312992293654448, "learning_rate": 9.058727145030412e-06, "loss": 0.1939, "step": 2446 }, { "epoch": 0.67, "grad_norm": 2.284478786525657, "learning_rate": 9.057866322332444e-06, "loss": 0.1795, "step": 2447 }, { "epoch": 0.67, "grad_norm": 2.505819551045151, "learning_rate": 9.057005147130069e-06, "loss": 0.2145, "step": 2448 }, { "epoch": 0.67, "grad_norm": 2.644106449204228, "learning_rate": 9.056143619498092e-06, "loss": 0.2226, "step": 2449 }, { "epoch": 0.67, "grad_norm": 2.5437561761628147, "learning_rate": 9.055281739511357e-06, "loss": 0.2118, "step": 2450 }, { "epoch": 0.67, "grad_norm": 2.3994728772589786, "learning_rate": 9.054419507244733e-06, "loss": 0.2066, "step": 2451 }, { "epoch": 0.67, "grad_norm": 2.6583797683100525, "learning_rate": 9.053556922773123e-06, "loss": 0.243, "step": 2452 }, { "epoch": 0.67, "grad_norm": 2.24078021872897, "learning_rate": 9.052693986171458e-06, "loss": 0.2034, "step": 2453 }, { "epoch": 0.67, "grad_norm": 2.8615102174164577, "learning_rate": 9.0518306975147e-06, "loss": 0.2117, "step": 2454 }, { "epoch": 0.67, "grad_norm": 2.6016578727282496, "learning_rate": 9.050967056877846e-06, "loss": 0.184, "step": 2455 }, { "epoch": 0.67, "grad_norm": 2.6024994900214606, "learning_rate": 9.050103064335918e-06, "loss": 0.2334, "step": 2456 }, { "epoch": 0.67, "grad_norm": 2.565712321449078, "learning_rate": 9.049238719963968e-06, "loss": 0.2069, "step": 2457 }, { "epoch": 0.67, "grad_norm": 2.3959642164561785, "learning_rate": 9.048374023837086e-06, "loss": 0.1892, "step": 2458 }, { "epoch": 0.67, "grad_norm": 2.4830850896312624, "learning_rate": 9.047508976030383e-06, "loss": 0.1815, "step": 2459 }, { "epoch": 0.67, "grad_norm": 2.3611934810103556, "learning_rate": 9.046643576619007e-06, "loss": 0.2017, "step": 2460 }, { "epoch": 0.67, "grad_norm": 2.419255193019906, "learning_rate": 9.045777825678135e-06, "loss": 0.1934, "step": 2461 }, { "epoch": 0.67, "grad_norm": 2.759285035517681, "learning_rate": 9.044911723282974e-06, "loss": 0.2286, "step": 2462 }, { "epoch": 0.67, "grad_norm": 2.2771914060996736, "learning_rate": 9.044045269508762e-06, "loss": 0.1905, "step": 2463 }, { "epoch": 0.67, "grad_norm": 2.1274783459043265, "learning_rate": 9.043178464430767e-06, "loss": 0.1773, "step": 2464 }, { "epoch": 0.67, "grad_norm": 2.045621177965386, "learning_rate": 9.042311308124287e-06, "loss": 0.1873, "step": 2465 }, { "epoch": 0.67, "grad_norm": 2.319864296319806, "learning_rate": 9.041443800664653e-06, "loss": 0.1906, "step": 2466 }, { "epoch": 0.67, "grad_norm": 2.4311581086363736, "learning_rate": 9.040575942127225e-06, "loss": 0.2106, "step": 2467 }, { "epoch": 0.67, "grad_norm": 2.549959377963562, "learning_rate": 9.039707732587393e-06, "loss": 0.2248, "step": 2468 }, { "epoch": 0.67, "grad_norm": 2.440690134697464, "learning_rate": 9.038839172120575e-06, "loss": 0.2064, "step": 2469 }, { "epoch": 0.67, "grad_norm": 2.524740913080514, "learning_rate": 9.037970260802227e-06, "loss": 0.2381, "step": 2470 }, { "epoch": 0.67, "grad_norm": 2.5354858092078034, "learning_rate": 9.037100998707829e-06, "loss": 0.2091, "step": 2471 }, { "epoch": 0.67, "grad_norm": 2.9763266121004284, "learning_rate": 9.03623138591289e-06, "loss": 0.2129, "step": 2472 }, { "epoch": 0.68, "grad_norm": 2.565062421558977, "learning_rate": 9.035361422492956e-06, "loss": 0.2157, "step": 2473 }, { "epoch": 0.68, "grad_norm": 2.3279172146755873, "learning_rate": 9.034491108523603e-06, "loss": 0.2095, "step": 2474 }, { "epoch": 0.68, "grad_norm": 2.7564536824755943, "learning_rate": 9.033620444080427e-06, "loss": 0.2123, "step": 2475 }, { "epoch": 0.68, "grad_norm": 2.192012728465349, "learning_rate": 9.032749429239069e-06, "loss": 0.1692, "step": 2476 }, { "epoch": 0.68, "grad_norm": 2.1868698557802526, "learning_rate": 9.03187806407519e-06, "loss": 0.1572, "step": 2477 }, { "epoch": 0.68, "grad_norm": 2.5350585643309613, "learning_rate": 9.031006348664487e-06, "loss": 0.1739, "step": 2478 }, { "epoch": 0.68, "grad_norm": 2.3235306470499935, "learning_rate": 9.030134283082683e-06, "loss": 0.2116, "step": 2479 }, { "epoch": 0.68, "grad_norm": 2.3368059019653877, "learning_rate": 9.029261867405536e-06, "loss": 0.1881, "step": 2480 }, { "epoch": 0.68, "grad_norm": 1.983056906125631, "learning_rate": 9.028389101708833e-06, "loss": 0.1533, "step": 2481 }, { "epoch": 0.68, "grad_norm": 2.510747102752466, "learning_rate": 9.027515986068387e-06, "loss": 0.1907, "step": 2482 }, { "epoch": 0.68, "grad_norm": 2.3864579964373323, "learning_rate": 9.026642520560047e-06, "loss": 0.229, "step": 2483 }, { "epoch": 0.68, "grad_norm": 2.472303629375403, "learning_rate": 9.025768705259691e-06, "loss": 0.1719, "step": 2484 }, { "epoch": 0.68, "grad_norm": 2.514599661066843, "learning_rate": 9.024894540243227e-06, "loss": 0.1832, "step": 2485 }, { "epoch": 0.68, "grad_norm": 2.6526308781800343, "learning_rate": 9.024020025586592e-06, "loss": 0.1982, "step": 2486 }, { "epoch": 0.68, "grad_norm": 2.529140028023631, "learning_rate": 9.023145161365755e-06, "loss": 0.1875, "step": 2487 }, { "epoch": 0.68, "grad_norm": 2.219666914906933, "learning_rate": 9.022269947656714e-06, "loss": 0.172, "step": 2488 }, { "epoch": 0.68, "grad_norm": 2.2115965038405214, "learning_rate": 9.0213943845355e-06, "loss": 0.1639, "step": 2489 }, { "epoch": 0.68, "grad_norm": 2.5239739531750796, "learning_rate": 9.020518472078172e-06, "loss": 0.1766, "step": 2490 }, { "epoch": 0.68, "grad_norm": 2.7168355638313866, "learning_rate": 9.019642210360821e-06, "loss": 0.2226, "step": 2491 }, { "epoch": 0.68, "grad_norm": 2.6850842491003104, "learning_rate": 9.018765599459564e-06, "loss": 0.1593, "step": 2492 }, { "epoch": 0.68, "grad_norm": 2.157432193569933, "learning_rate": 9.017888639450557e-06, "loss": 0.1671, "step": 2493 }, { "epoch": 0.68, "grad_norm": 2.1735623248198594, "learning_rate": 9.017011330409975e-06, "loss": 0.1777, "step": 2494 }, { "epoch": 0.68, "grad_norm": 2.531257473174138, "learning_rate": 9.016133672414034e-06, "loss": 0.2359, "step": 2495 }, { "epoch": 0.68, "grad_norm": 2.6663386506280053, "learning_rate": 9.015255665538972e-06, "loss": 0.2259, "step": 2496 }, { "epoch": 0.68, "grad_norm": 2.46173077673191, "learning_rate": 9.014377309861064e-06, "loss": 0.2247, "step": 2497 }, { "epoch": 0.68, "grad_norm": 2.5898726981211895, "learning_rate": 9.01349860545661e-06, "loss": 0.1998, "step": 2498 }, { "epoch": 0.68, "grad_norm": 2.55733901522693, "learning_rate": 9.012619552401945e-06, "loss": 0.1852, "step": 2499 }, { "epoch": 0.68, "grad_norm": 2.5922058456675705, "learning_rate": 9.01174015077343e-06, "loss": 0.2169, "step": 2500 }, { "epoch": 0.68, "grad_norm": 2.432134587314923, "learning_rate": 9.010860400647457e-06, "loss": 0.179, "step": 2501 }, { "epoch": 0.68, "grad_norm": 2.7066590187228057, "learning_rate": 9.009980302100452e-06, "loss": 0.2221, "step": 2502 }, { "epoch": 0.68, "grad_norm": 2.1703036328767773, "learning_rate": 9.009099855208867e-06, "loss": 0.1749, "step": 2503 }, { "epoch": 0.68, "grad_norm": 2.3696854242886443, "learning_rate": 9.008219060049188e-06, "loss": 0.2019, "step": 2504 }, { "epoch": 0.68, "grad_norm": 2.6393038590768536, "learning_rate": 9.007337916697925e-06, "loss": 0.1896, "step": 2505 }, { "epoch": 0.68, "grad_norm": 2.2458042418158373, "learning_rate": 9.006456425231624e-06, "loss": 0.171, "step": 2506 }, { "epoch": 0.68, "grad_norm": 2.4766936086919618, "learning_rate": 9.005574585726864e-06, "loss": 0.1838, "step": 2507 }, { "epoch": 0.68, "grad_norm": 2.4339046291804265, "learning_rate": 9.004692398260243e-06, "loss": 0.2033, "step": 2508 }, { "epoch": 0.68, "grad_norm": 2.1070770820711737, "learning_rate": 9.003809862908401e-06, "loss": 0.165, "step": 2509 }, { "epoch": 0.69, "grad_norm": 2.3495901576145233, "learning_rate": 9.002926979748003e-06, "loss": 0.1534, "step": 2510 }, { "epoch": 0.69, "grad_norm": 2.679900667081155, "learning_rate": 9.002043748855742e-06, "loss": 0.1875, "step": 2511 }, { "epoch": 0.69, "grad_norm": 2.278490696235511, "learning_rate": 9.001160170308346e-06, "loss": 0.1882, "step": 2512 }, { "epoch": 0.69, "grad_norm": 2.2114416143034665, "learning_rate": 9.000276244182567e-06, "loss": 0.1469, "step": 2513 }, { "epoch": 0.69, "grad_norm": 2.14285991408518, "learning_rate": 8.999391970555197e-06, "loss": 0.1727, "step": 2514 }, { "epoch": 0.69, "grad_norm": 2.516106427016791, "learning_rate": 8.998507349503048e-06, "loss": 0.1956, "step": 2515 }, { "epoch": 0.69, "grad_norm": 2.3448306359367037, "learning_rate": 8.997622381102968e-06, "loss": 0.1837, "step": 2516 }, { "epoch": 0.69, "grad_norm": 2.3617013676775653, "learning_rate": 8.996737065431834e-06, "loss": 0.1828, "step": 2517 }, { "epoch": 0.69, "grad_norm": 2.760758996202912, "learning_rate": 8.995851402566553e-06, "loss": 0.2136, "step": 2518 }, { "epoch": 0.69, "grad_norm": 2.6919894177214885, "learning_rate": 8.99496539258406e-06, "loss": 0.2138, "step": 2519 }, { "epoch": 0.69, "grad_norm": 2.578201382299165, "learning_rate": 8.994079035561325e-06, "loss": 0.1909, "step": 2520 }, { "epoch": 0.69, "grad_norm": 2.384298749070819, "learning_rate": 8.993192331575342e-06, "loss": 0.212, "step": 2521 }, { "epoch": 0.69, "grad_norm": 2.567503057419424, "learning_rate": 8.992305280703141e-06, "loss": 0.195, "step": 2522 }, { "epoch": 0.69, "grad_norm": 2.1983066858698934, "learning_rate": 8.99141788302178e-06, "loss": 0.1862, "step": 2523 }, { "epoch": 0.69, "grad_norm": 2.648225708757083, "learning_rate": 8.990530138608346e-06, "loss": 0.1958, "step": 2524 }, { "epoch": 0.69, "grad_norm": 2.75944415053918, "learning_rate": 8.989642047539956e-06, "loss": 0.2242, "step": 2525 }, { "epoch": 0.69, "grad_norm": 2.270021089247332, "learning_rate": 8.988753609893757e-06, "loss": 0.1976, "step": 2526 }, { "epoch": 0.69, "grad_norm": 2.220451854767584, "learning_rate": 8.987864825746929e-06, "loss": 0.175, "step": 2527 }, { "epoch": 0.69, "grad_norm": 2.1935898661889714, "learning_rate": 8.986975695176678e-06, "loss": 0.1887, "step": 2528 }, { "epoch": 0.69, "grad_norm": 2.3983034854133365, "learning_rate": 8.986086218260247e-06, "loss": 0.1734, "step": 2529 }, { "epoch": 0.69, "grad_norm": 2.5217345647835656, "learning_rate": 8.985196395074899e-06, "loss": 0.2166, "step": 2530 }, { "epoch": 0.69, "grad_norm": 2.3942090099163704, "learning_rate": 8.984306225697935e-06, "loss": 0.1987, "step": 2531 }, { "epoch": 0.69, "grad_norm": 2.357183499918745, "learning_rate": 8.983415710206683e-06, "loss": 0.1847, "step": 2532 }, { "epoch": 0.69, "grad_norm": 2.4056164139368894, "learning_rate": 8.982524848678502e-06, "loss": 0.1946, "step": 2533 }, { "epoch": 0.69, "grad_norm": 2.273062218632727, "learning_rate": 8.981633641190779e-06, "loss": 0.1757, "step": 2534 }, { "epoch": 0.69, "grad_norm": 2.3669871095189774, "learning_rate": 8.980742087820935e-06, "loss": 0.164, "step": 2535 }, { "epoch": 0.69, "grad_norm": 2.7304213900609895, "learning_rate": 8.979850188646418e-06, "loss": 0.2254, "step": 2536 }, { "epoch": 0.69, "grad_norm": 2.669376962879526, "learning_rate": 8.978957943744703e-06, "loss": 0.168, "step": 2537 }, { "epoch": 0.69, "grad_norm": 2.586408129218207, "learning_rate": 8.978065353193305e-06, "loss": 0.2103, "step": 2538 }, { "epoch": 0.69, "grad_norm": 2.9451901423347504, "learning_rate": 8.97717241706976e-06, "loss": 0.1771, "step": 2539 }, { "epoch": 0.69, "grad_norm": 2.367741511715015, "learning_rate": 8.976279135451636e-06, "loss": 0.2065, "step": 2540 }, { "epoch": 0.69, "grad_norm": 2.539884020529156, "learning_rate": 8.975385508416532e-06, "loss": 0.2096, "step": 2541 }, { "epoch": 0.69, "grad_norm": 2.298470116584281, "learning_rate": 8.974491536042079e-06, "loss": 0.1823, "step": 2542 }, { "epoch": 0.69, "grad_norm": 2.5577680136098957, "learning_rate": 8.973597218405931e-06, "loss": 0.2171, "step": 2543 }, { "epoch": 0.69, "grad_norm": 2.3682258359476225, "learning_rate": 8.972702555585783e-06, "loss": 0.1696, "step": 2544 }, { "epoch": 0.69, "grad_norm": 2.45072881335195, "learning_rate": 8.971807547659349e-06, "loss": 0.2182, "step": 2545 }, { "epoch": 0.7, "grad_norm": 2.225956724766786, "learning_rate": 8.970912194704379e-06, "loss": 0.1949, "step": 2546 }, { "epoch": 0.7, "grad_norm": 2.2924213374139786, "learning_rate": 8.970016496798655e-06, "loss": 0.1555, "step": 2547 }, { "epoch": 0.7, "grad_norm": 2.3598353685315243, "learning_rate": 8.969120454019983e-06, "loss": 0.1968, "step": 2548 }, { "epoch": 0.7, "grad_norm": 2.140298079840243, "learning_rate": 8.9682240664462e-06, "loss": 0.1939, "step": 2549 }, { "epoch": 0.7, "grad_norm": 2.5966158436583457, "learning_rate": 8.967327334155179e-06, "loss": 0.1976, "step": 2550 }, { "epoch": 0.7, "grad_norm": 2.110009483547678, "learning_rate": 8.966430257224814e-06, "loss": 0.1868, "step": 2551 }, { "epoch": 0.7, "grad_norm": 2.469090312716522, "learning_rate": 8.965532835733035e-06, "loss": 0.2249, "step": 2552 }, { "epoch": 0.7, "grad_norm": 2.370113641352872, "learning_rate": 8.964635069757803e-06, "loss": 0.2061, "step": 2553 }, { "epoch": 0.7, "grad_norm": 2.546990219077739, "learning_rate": 8.963736959377103e-06, "loss": 0.2428, "step": 2554 }, { "epoch": 0.7, "grad_norm": 2.3509801057604114, "learning_rate": 8.962838504668956e-06, "loss": 0.1919, "step": 2555 }, { "epoch": 0.7, "grad_norm": 2.5454718768347426, "learning_rate": 8.961939705711407e-06, "loss": 0.1909, "step": 2556 }, { "epoch": 0.7, "grad_norm": 2.3623847434195118, "learning_rate": 8.96104056258254e-06, "loss": 0.2182, "step": 2557 }, { "epoch": 0.7, "grad_norm": 2.4994084402990775, "learning_rate": 8.960141075360455e-06, "loss": 0.2169, "step": 2558 }, { "epoch": 0.7, "grad_norm": 2.7713601720157772, "learning_rate": 8.959241244123296e-06, "loss": 0.2162, "step": 2559 }, { "epoch": 0.7, "grad_norm": 2.510208846466004, "learning_rate": 8.95834106894923e-06, "loss": 0.1802, "step": 2560 }, { "epoch": 0.7, "grad_norm": 2.683542890183487, "learning_rate": 8.95744054991645e-06, "loss": 0.22, "step": 2561 }, { "epoch": 0.7, "grad_norm": 2.2349075239697282, "learning_rate": 8.95653968710319e-06, "loss": 0.1904, "step": 2562 }, { "epoch": 0.7, "grad_norm": 2.4025046067993463, "learning_rate": 8.955638480587705e-06, "loss": 0.1888, "step": 2563 }, { "epoch": 0.7, "grad_norm": 2.3291027823376704, "learning_rate": 8.95473693044828e-06, "loss": 0.1932, "step": 2564 }, { "epoch": 0.7, "grad_norm": 2.3294861762512635, "learning_rate": 8.953835036763234e-06, "loss": 0.1829, "step": 2565 }, { "epoch": 0.7, "grad_norm": 2.2631303033466508, "learning_rate": 8.952932799610916e-06, "loss": 0.1981, "step": 2566 }, { "epoch": 0.7, "grad_norm": 2.6347584544532427, "learning_rate": 8.952030219069699e-06, "loss": 0.1978, "step": 2567 }, { "epoch": 0.7, "grad_norm": 2.3729548502433104, "learning_rate": 8.951127295217991e-06, "loss": 0.1992, "step": 2568 }, { "epoch": 0.7, "grad_norm": 2.3796328147454724, "learning_rate": 8.950224028134228e-06, "loss": 0.1749, "step": 2569 }, { "epoch": 0.7, "grad_norm": 2.2032308613495553, "learning_rate": 8.949320417896878e-06, "loss": 0.1623, "step": 2570 }, { "epoch": 0.7, "grad_norm": 2.349496398032231, "learning_rate": 8.948416464584437e-06, "loss": 0.1738, "step": 2571 }, { "epoch": 0.7, "grad_norm": 2.4801402860656125, "learning_rate": 8.94751216827543e-06, "loss": 0.2164, "step": 2572 }, { "epoch": 0.7, "grad_norm": 2.616846553003231, "learning_rate": 8.946607529048413e-06, "loss": 0.2054, "step": 2573 }, { "epoch": 0.7, "grad_norm": 2.5664926276413067, "learning_rate": 8.94570254698197e-06, "loss": 0.1987, "step": 2574 }, { "epoch": 0.7, "grad_norm": 2.595175724344372, "learning_rate": 8.944797222154717e-06, "loss": 0.1782, "step": 2575 }, { "epoch": 0.7, "grad_norm": 2.4764064047660423, "learning_rate": 8.943891554645298e-06, "loss": 0.2036, "step": 2576 }, { "epoch": 0.7, "grad_norm": 2.50359201275801, "learning_rate": 8.942985544532392e-06, "loss": 0.1792, "step": 2577 }, { "epoch": 0.7, "grad_norm": 2.2853237458671276, "learning_rate": 8.942079191894699e-06, "loss": 0.1869, "step": 2578 }, { "epoch": 0.7, "grad_norm": 2.4175671303708506, "learning_rate": 8.941172496810956e-06, "loss": 0.2111, "step": 2579 }, { "epoch": 0.7, "grad_norm": 2.4312945112777866, "learning_rate": 8.940265459359927e-06, "loss": 0.2159, "step": 2580 }, { "epoch": 0.7, "grad_norm": 2.3866714982493313, "learning_rate": 8.939358079620404e-06, "loss": 0.2222, "step": 2581 }, { "epoch": 0.7, "grad_norm": 2.507593823712348, "learning_rate": 8.938450357671211e-06, "loss": 0.1952, "step": 2582 }, { "epoch": 0.71, "grad_norm": 2.4490264204042433, "learning_rate": 8.937542293591201e-06, "loss": 0.2059, "step": 2583 }, { "epoch": 0.71, "grad_norm": 2.5427577795874403, "learning_rate": 8.936633887459259e-06, "loss": 0.2066, "step": 2584 }, { "epoch": 0.71, "grad_norm": 2.0981010453887894, "learning_rate": 8.935725139354296e-06, "loss": 0.1692, "step": 2585 }, { "epoch": 0.71, "grad_norm": 2.4256197886371558, "learning_rate": 8.934816049355255e-06, "loss": 0.1774, "step": 2586 }, { "epoch": 0.71, "grad_norm": 2.561659906225474, "learning_rate": 8.933906617541107e-06, "loss": 0.1897, "step": 2587 }, { "epoch": 0.71, "grad_norm": 2.455147206768468, "learning_rate": 8.932996843990855e-06, "loss": 0.2136, "step": 2588 }, { "epoch": 0.71, "grad_norm": 2.671623848703675, "learning_rate": 8.932086728783531e-06, "loss": 0.2089, "step": 2589 }, { "epoch": 0.71, "grad_norm": 2.4538046001696907, "learning_rate": 8.931176271998195e-06, "loss": 0.181, "step": 2590 }, { "epoch": 0.71, "grad_norm": 2.1588708352543704, "learning_rate": 8.930265473713939e-06, "loss": 0.1803, "step": 2591 }, { "epoch": 0.71, "grad_norm": 2.4808194384474196, "learning_rate": 8.92935433400988e-06, "loss": 0.2281, "step": 2592 }, { "epoch": 0.71, "grad_norm": 2.342660061878623, "learning_rate": 8.928442852965174e-06, "loss": 0.1996, "step": 2593 }, { "epoch": 0.71, "grad_norm": 2.3213882097449123, "learning_rate": 8.927531030658996e-06, "loss": 0.1862, "step": 2594 }, { "epoch": 0.71, "grad_norm": 2.279203330837764, "learning_rate": 8.926618867170555e-06, "loss": 0.2044, "step": 2595 }, { "epoch": 0.71, "grad_norm": 2.6501150126803514, "learning_rate": 8.925706362579097e-06, "loss": 0.2396, "step": 2596 }, { "epoch": 0.71, "grad_norm": 2.3008470660473077, "learning_rate": 8.924793516963881e-06, "loss": 0.1902, "step": 2597 }, { "epoch": 0.71, "grad_norm": 2.525054516359642, "learning_rate": 8.923880330404213e-06, "loss": 0.1973, "step": 2598 }, { "epoch": 0.71, "grad_norm": 1.985375270909967, "learning_rate": 8.922966802979419e-06, "loss": 0.1781, "step": 2599 }, { "epoch": 0.71, "grad_norm": 2.3014604694785024, "learning_rate": 8.922052934768853e-06, "loss": 0.1848, "step": 2600 }, { "epoch": 0.71, "grad_norm": 2.3950569056470057, "learning_rate": 8.921138725851905e-06, "loss": 0.1597, "step": 2601 }, { "epoch": 0.71, "grad_norm": 3.3100562560786737, "learning_rate": 8.920224176307994e-06, "loss": 0.1987, "step": 2602 }, { "epoch": 0.71, "grad_norm": 2.6765741555939098, "learning_rate": 8.919309286216564e-06, "loss": 0.2271, "step": 2603 }, { "epoch": 0.71, "grad_norm": 2.4074240365292305, "learning_rate": 8.918394055657091e-06, "loss": 0.2097, "step": 2604 }, { "epoch": 0.71, "grad_norm": 2.8491943599881737, "learning_rate": 8.917478484709078e-06, "loss": 0.2319, "step": 2605 }, { "epoch": 0.71, "grad_norm": 3.074805592357438, "learning_rate": 8.916562573452066e-06, "loss": 0.201, "step": 2606 }, { "epoch": 0.71, "grad_norm": 2.349371380185813, "learning_rate": 8.915646321965615e-06, "loss": 0.2053, "step": 2607 }, { "epoch": 0.71, "grad_norm": 2.244421993615951, "learning_rate": 8.914729730329321e-06, "loss": 0.1849, "step": 2608 }, { "epoch": 0.71, "grad_norm": 2.707783615326083, "learning_rate": 8.913812798622806e-06, "loss": 0.2418, "step": 2609 }, { "epoch": 0.71, "grad_norm": 2.4143826250045017, "learning_rate": 8.912895526925726e-06, "loss": 0.2264, "step": 2610 }, { "epoch": 0.71, "grad_norm": 2.75017070650157, "learning_rate": 8.911977915317763e-06, "loss": 0.212, "step": 2611 }, { "epoch": 0.71, "grad_norm": 2.4221582789612217, "learning_rate": 8.911059963878628e-06, "loss": 0.1865, "step": 2612 }, { "epoch": 0.71, "grad_norm": 2.465579895374271, "learning_rate": 8.910141672688063e-06, "loss": 0.2352, "step": 2613 }, { "epoch": 0.71, "grad_norm": 2.2840294665344527, "learning_rate": 8.90922304182584e-06, "loss": 0.1946, "step": 2614 }, { "epoch": 0.71, "grad_norm": 2.509777791455074, "learning_rate": 8.90830407137176e-06, "loss": 0.2016, "step": 2615 }, { "epoch": 0.71, "grad_norm": 2.304029709912657, "learning_rate": 8.907384761405655e-06, "loss": 0.1898, "step": 2616 }, { "epoch": 0.71, "grad_norm": 2.3494404301626983, "learning_rate": 8.906465112007383e-06, "loss": 0.1888, "step": 2617 }, { "epoch": 0.71, "grad_norm": 2.2915500246689295, "learning_rate": 8.905545123256834e-06, "loss": 0.1903, "step": 2618 }, { "epoch": 0.71, "grad_norm": 2.4862369114630454, "learning_rate": 8.904624795233926e-06, "loss": 0.1719, "step": 2619 }, { "epoch": 0.72, "grad_norm": 2.4562799641917175, "learning_rate": 8.903704128018608e-06, "loss": 0.1868, "step": 2620 }, { "epoch": 0.72, "grad_norm": 2.396967219162005, "learning_rate": 8.90278312169086e-06, "loss": 0.2036, "step": 2621 }, { "epoch": 0.72, "grad_norm": 2.5542097177272853, "learning_rate": 8.901861776330682e-06, "loss": 0.1857, "step": 2622 }, { "epoch": 0.72, "grad_norm": 2.640828208504622, "learning_rate": 8.90094009201812e-06, "loss": 0.2163, "step": 2623 }, { "epoch": 0.72, "grad_norm": 2.9035881114351754, "learning_rate": 8.900018068833233e-06, "loss": 0.2161, "step": 2624 }, { "epoch": 0.72, "grad_norm": 2.1263250135172362, "learning_rate": 8.899095706856122e-06, "loss": 0.172, "step": 2625 }, { "epoch": 0.72, "grad_norm": 2.4189926210944415, "learning_rate": 8.89817300616691e-06, "loss": 0.2049, "step": 2626 }, { "epoch": 0.72, "grad_norm": 2.5532193452171397, "learning_rate": 8.897249966845748e-06, "loss": 0.1889, "step": 2627 }, { "epoch": 0.72, "grad_norm": 2.272106966068934, "learning_rate": 8.896326588972826e-06, "loss": 0.1815, "step": 2628 }, { "epoch": 0.72, "grad_norm": 2.3596544155483374, "learning_rate": 8.895402872628352e-06, "loss": 0.1928, "step": 2629 }, { "epoch": 0.72, "grad_norm": 2.4576055375391777, "learning_rate": 8.894478817892574e-06, "loss": 0.1976, "step": 2630 }, { "epoch": 0.72, "grad_norm": 2.3334390278615667, "learning_rate": 8.893554424845758e-06, "loss": 0.1906, "step": 2631 }, { "epoch": 0.72, "grad_norm": 2.3957573839948045, "learning_rate": 8.892629693568209e-06, "loss": 0.2194, "step": 2632 }, { "epoch": 0.72, "grad_norm": 2.593040381054902, "learning_rate": 8.891704624140257e-06, "loss": 0.2009, "step": 2633 }, { "epoch": 0.72, "grad_norm": 2.366141529724085, "learning_rate": 8.890779216642263e-06, "loss": 0.1875, "step": 2634 }, { "epoch": 0.72, "grad_norm": 2.0176872815599713, "learning_rate": 8.889853471154615e-06, "loss": 0.1905, "step": 2635 }, { "epoch": 0.72, "grad_norm": 2.2938314093045014, "learning_rate": 8.888927387757735e-06, "loss": 0.1832, "step": 2636 }, { "epoch": 0.72, "grad_norm": 2.5337541768176055, "learning_rate": 8.88800096653207e-06, "loss": 0.1771, "step": 2637 }, { "epoch": 0.72, "grad_norm": 2.5706883467893515, "learning_rate": 8.887074207558092e-06, "loss": 0.1959, "step": 2638 }, { "epoch": 0.72, "grad_norm": 2.613025290018213, "learning_rate": 8.886147110916316e-06, "loss": 0.2099, "step": 2639 }, { "epoch": 0.72, "grad_norm": 2.5296313291740886, "learning_rate": 8.885219676687277e-06, "loss": 0.1964, "step": 2640 }, { "epoch": 0.72, "grad_norm": 2.6274165654596917, "learning_rate": 8.884291904951538e-06, "loss": 0.1955, "step": 2641 }, { "epoch": 0.72, "grad_norm": 2.322922156189818, "learning_rate": 8.883363795789694e-06, "loss": 0.1902, "step": 2642 }, { "epoch": 0.72, "grad_norm": 2.7197900251218736, "learning_rate": 8.882435349282371e-06, "loss": 0.1966, "step": 2643 }, { "epoch": 0.72, "grad_norm": 2.5146984303852613, "learning_rate": 8.88150656551022e-06, "loss": 0.2029, "step": 2644 }, { "epoch": 0.72, "grad_norm": 2.471498318081182, "learning_rate": 8.880577444553929e-06, "loss": 0.1632, "step": 2645 }, { "epoch": 0.72, "grad_norm": 2.382739921323251, "learning_rate": 8.879647986494205e-06, "loss": 0.1877, "step": 2646 }, { "epoch": 0.72, "grad_norm": 2.353491698934403, "learning_rate": 8.878718191411792e-06, "loss": 0.1942, "step": 2647 }, { "epoch": 0.72, "grad_norm": 2.0638506623885307, "learning_rate": 8.87778805938746e-06, "loss": 0.1796, "step": 2648 }, { "epoch": 0.72, "grad_norm": 2.398036634202962, "learning_rate": 8.876857590502008e-06, "loss": 0.2193, "step": 2649 }, { "epoch": 0.72, "grad_norm": 2.016808796523212, "learning_rate": 8.875926784836267e-06, "loss": 0.1839, "step": 2650 }, { "epoch": 0.72, "grad_norm": 2.372394166875836, "learning_rate": 8.874995642471094e-06, "loss": 0.2073, "step": 2651 }, { "epoch": 0.72, "grad_norm": 2.64394057706979, "learning_rate": 8.87406416348738e-06, "loss": 0.2165, "step": 2652 }, { "epoch": 0.72, "grad_norm": 2.2874222473065564, "learning_rate": 8.873132347966038e-06, "loss": 0.169, "step": 2653 }, { "epoch": 0.72, "grad_norm": 2.5881586221816844, "learning_rate": 8.872200195988016e-06, "loss": 0.2303, "step": 2654 }, { "epoch": 0.72, "grad_norm": 2.360985619004908, "learning_rate": 8.87126770763429e-06, "loss": 0.2072, "step": 2655 }, { "epoch": 0.73, "grad_norm": 2.379383349603289, "learning_rate": 8.870334882985866e-06, "loss": 0.2116, "step": 2656 }, { "epoch": 0.73, "grad_norm": 2.3236815237475192, "learning_rate": 8.869401722123771e-06, "loss": 0.1735, "step": 2657 }, { "epoch": 0.73, "grad_norm": 2.381407411566154, "learning_rate": 8.868468225129078e-06, "loss": 0.1991, "step": 2658 }, { "epoch": 0.73, "grad_norm": 2.5332292689228026, "learning_rate": 8.867534392082873e-06, "loss": 0.2303, "step": 2659 }, { "epoch": 0.73, "grad_norm": 3.0766636850425058, "learning_rate": 8.866600223066277e-06, "loss": 0.212, "step": 2660 }, { "epoch": 0.73, "grad_norm": 2.230793299068313, "learning_rate": 8.865665718160445e-06, "loss": 0.1952, "step": 2661 }, { "epoch": 0.73, "grad_norm": 2.28309000536618, "learning_rate": 8.864730877446555e-06, "loss": 0.1981, "step": 2662 }, { "epoch": 0.73, "grad_norm": 2.275711567135502, "learning_rate": 8.863795701005813e-06, "loss": 0.1946, "step": 2663 }, { "epoch": 0.73, "grad_norm": 2.28430198968044, "learning_rate": 8.862860188919462e-06, "loss": 0.2033, "step": 2664 }, { "epoch": 0.73, "grad_norm": 2.368544603110747, "learning_rate": 8.861924341268768e-06, "loss": 0.2011, "step": 2665 }, { "epoch": 0.73, "grad_norm": 2.4294646823414103, "learning_rate": 8.860988158135025e-06, "loss": 0.1739, "step": 2666 }, { "epoch": 0.73, "grad_norm": 2.221545826217993, "learning_rate": 8.86005163959956e-06, "loss": 0.168, "step": 2667 }, { "epoch": 0.73, "grad_norm": 2.1390555800485322, "learning_rate": 8.85911478574373e-06, "loss": 0.1711, "step": 2668 }, { "epoch": 0.73, "grad_norm": 2.772727096983433, "learning_rate": 8.858177596648915e-06, "loss": 0.26, "step": 2669 }, { "epoch": 0.73, "grad_norm": 2.558961118855876, "learning_rate": 8.857240072396533e-06, "loss": 0.1786, "step": 2670 }, { "epoch": 0.73, "grad_norm": 2.386775291108183, "learning_rate": 8.856302213068022e-06, "loss": 0.2214, "step": 2671 }, { "epoch": 0.73, "grad_norm": 2.3310011330571294, "learning_rate": 8.855364018744854e-06, "loss": 0.1684, "step": 2672 }, { "epoch": 0.73, "grad_norm": 2.273939990036212, "learning_rate": 8.85442548950853e-06, "loss": 0.1911, "step": 2673 }, { "epoch": 0.73, "grad_norm": 2.4815458541950965, "learning_rate": 8.853486625440581e-06, "loss": 0.2182, "step": 2674 }, { "epoch": 0.73, "grad_norm": 2.0978484759341267, "learning_rate": 8.852547426622563e-06, "loss": 0.1582, "step": 2675 }, { "epoch": 0.73, "grad_norm": 2.169755746431044, "learning_rate": 8.851607893136065e-06, "loss": 0.1461, "step": 2676 }, { "epoch": 0.73, "grad_norm": 2.444823710297576, "learning_rate": 8.850668025062704e-06, "loss": 0.2153, "step": 2677 }, { "epoch": 0.73, "grad_norm": 2.349062825390895, "learning_rate": 8.849727822484125e-06, "loss": 0.1737, "step": 2678 }, { "epoch": 0.73, "grad_norm": 2.334075504860098, "learning_rate": 8.848787285482003e-06, "loss": 0.1639, "step": 2679 }, { "epoch": 0.73, "grad_norm": 3.2785551599374436, "learning_rate": 8.847846414138041e-06, "loss": 0.1872, "step": 2680 }, { "epoch": 0.73, "grad_norm": 2.353327556189428, "learning_rate": 8.846905208533974e-06, "loss": 0.2066, "step": 2681 }, { "epoch": 0.73, "grad_norm": 2.7052969734121, "learning_rate": 8.84596366875156e-06, "loss": 0.2187, "step": 2682 }, { "epoch": 0.73, "grad_norm": 2.4316913495022687, "learning_rate": 8.845021794872597e-06, "loss": 0.1951, "step": 2683 }, { "epoch": 0.73, "grad_norm": 2.530526962112897, "learning_rate": 8.844079586978897e-06, "loss": 0.1605, "step": 2684 }, { "epoch": 0.73, "grad_norm": 2.025354715696344, "learning_rate": 8.843137045152314e-06, "loss": 0.1467, "step": 2685 }, { "epoch": 0.73, "grad_norm": 2.5048048944435073, "learning_rate": 8.842194169474727e-06, "loss": 0.2003, "step": 2686 }, { "epoch": 0.73, "grad_norm": 2.055699239640041, "learning_rate": 8.84125096002804e-06, "loss": 0.1944, "step": 2687 }, { "epoch": 0.73, "grad_norm": 2.508366549060347, "learning_rate": 8.840307416894189e-06, "loss": 0.2225, "step": 2688 }, { "epoch": 0.73, "grad_norm": 2.1510090795677193, "learning_rate": 8.83936354015514e-06, "loss": 0.1931, "step": 2689 }, { "epoch": 0.73, "grad_norm": 3.007995593537831, "learning_rate": 8.838419329892887e-06, "loss": 0.2572, "step": 2690 }, { "epoch": 0.73, "grad_norm": 2.431869073316424, "learning_rate": 8.837474786189454e-06, "loss": 0.1721, "step": 2691 }, { "epoch": 0.73, "grad_norm": 2.144026062533291, "learning_rate": 8.836529909126891e-06, "loss": 0.1978, "step": 2692 }, { "epoch": 0.74, "grad_norm": 2.4982088085582665, "learning_rate": 8.83558469878728e-06, "loss": 0.2134, "step": 2693 }, { "epoch": 0.74, "grad_norm": 2.40689261859619, "learning_rate": 8.834639155252732e-06, "loss": 0.2002, "step": 2694 }, { "epoch": 0.74, "grad_norm": 2.3185364909248567, "learning_rate": 8.833693278605381e-06, "loss": 0.1995, "step": 2695 }, { "epoch": 0.74, "grad_norm": 2.3452714908893015, "learning_rate": 8.832747068927404e-06, "loss": 0.1777, "step": 2696 }, { "epoch": 0.74, "grad_norm": 2.4296731297586613, "learning_rate": 8.831800526300987e-06, "loss": 0.1996, "step": 2697 }, { "epoch": 0.74, "grad_norm": 2.1262934381815453, "learning_rate": 8.830853650808361e-06, "loss": 0.172, "step": 2698 }, { "epoch": 0.74, "grad_norm": 2.387946308830362, "learning_rate": 8.829906442531782e-06, "loss": 0.1819, "step": 2699 }, { "epoch": 0.74, "grad_norm": 2.5480912583075277, "learning_rate": 8.828958901553529e-06, "loss": 0.1925, "step": 2700 }, { "epoch": 0.74, "grad_norm": 2.2675911521147873, "learning_rate": 8.828011027955918e-06, "loss": 0.1949, "step": 2701 }, { "epoch": 0.74, "grad_norm": 2.235832415913824, "learning_rate": 8.82706282182129e-06, "loss": 0.1804, "step": 2702 }, { "epoch": 0.74, "grad_norm": 2.405518129795668, "learning_rate": 8.826114283232012e-06, "loss": 0.1918, "step": 2703 }, { "epoch": 0.74, "grad_norm": 3.1489028022114343, "learning_rate": 8.825165412270487e-06, "loss": 0.183, "step": 2704 }, { "epoch": 0.74, "grad_norm": 2.4898813225517396, "learning_rate": 8.824216209019139e-06, "loss": 0.2136, "step": 2705 }, { "epoch": 0.74, "grad_norm": 2.4290380574607413, "learning_rate": 8.823266673560426e-06, "loss": 0.1997, "step": 2706 }, { "epoch": 0.74, "grad_norm": 2.0547336409764037, "learning_rate": 8.822316805976836e-06, "loss": 0.1964, "step": 2707 }, { "epoch": 0.74, "grad_norm": 2.4995177184981383, "learning_rate": 8.821366606350882e-06, "loss": 0.204, "step": 2708 }, { "epoch": 0.74, "grad_norm": 2.496877932327667, "learning_rate": 8.820416074765106e-06, "loss": 0.1846, "step": 2709 }, { "epoch": 0.74, "grad_norm": 2.811538995486479, "learning_rate": 8.819465211302081e-06, "loss": 0.2538, "step": 2710 }, { "epoch": 0.74, "grad_norm": 2.3483936457928305, "learning_rate": 8.818514016044405e-06, "loss": 0.2007, "step": 2711 }, { "epoch": 0.74, "grad_norm": 2.459001267028272, "learning_rate": 8.817562489074714e-06, "loss": 0.1892, "step": 2712 }, { "epoch": 0.74, "grad_norm": 2.330527072438074, "learning_rate": 8.816610630475664e-06, "loss": 0.1985, "step": 2713 }, { "epoch": 0.74, "grad_norm": 1.927556743327112, "learning_rate": 8.81565844032994e-06, "loss": 0.1527, "step": 2714 }, { "epoch": 0.74, "grad_norm": 2.5477202344241388, "learning_rate": 8.814705918720259e-06, "loss": 0.205, "step": 2715 }, { "epoch": 0.74, "grad_norm": 2.4638115812631862, "learning_rate": 8.813753065729369e-06, "loss": 0.1914, "step": 2716 }, { "epoch": 0.74, "grad_norm": 2.1382782141388263, "learning_rate": 8.812799881440039e-06, "loss": 0.1869, "step": 2717 }, { "epoch": 0.74, "grad_norm": 2.2137579397035516, "learning_rate": 8.811846365935076e-06, "loss": 0.1826, "step": 2718 }, { "epoch": 0.74, "grad_norm": 2.3893327699204407, "learning_rate": 8.810892519297308e-06, "loss": 0.1719, "step": 2719 }, { "epoch": 0.74, "grad_norm": 2.2884005897257405, "learning_rate": 8.809938341609596e-06, "loss": 0.17, "step": 2720 }, { "epoch": 0.74, "grad_norm": 2.7329048226931616, "learning_rate": 8.808983832954831e-06, "loss": 0.256, "step": 2721 }, { "epoch": 0.74, "grad_norm": 2.6693921189089678, "learning_rate": 8.808028993415929e-06, "loss": 0.206, "step": 2722 }, { "epoch": 0.74, "grad_norm": 2.4346854883180216, "learning_rate": 8.807073823075835e-06, "loss": 0.1957, "step": 2723 }, { "epoch": 0.74, "grad_norm": 2.129420663344039, "learning_rate": 8.806118322017525e-06, "loss": 0.1711, "step": 2724 }, { "epoch": 0.74, "grad_norm": 2.0543657210581103, "learning_rate": 8.805162490324005e-06, "loss": 0.1643, "step": 2725 }, { "epoch": 0.74, "grad_norm": 2.2182602963716076, "learning_rate": 8.804206328078304e-06, "loss": 0.2095, "step": 2726 }, { "epoch": 0.74, "grad_norm": 2.4027833208632905, "learning_rate": 8.803249835363486e-06, "loss": 0.2085, "step": 2727 }, { "epoch": 0.74, "grad_norm": 2.16120960638039, "learning_rate": 8.80229301226264e-06, "loss": 0.1855, "step": 2728 }, { "epoch": 0.75, "grad_norm": 2.108758597089169, "learning_rate": 8.801335858858883e-06, "loss": 0.1567, "step": 2729 }, { "epoch": 0.75, "grad_norm": 2.2403412709481745, "learning_rate": 8.800378375235365e-06, "loss": 0.1895, "step": 2730 }, { "epoch": 0.75, "grad_norm": 2.338011618501962, "learning_rate": 8.79942056147526e-06, "loss": 0.1889, "step": 2731 }, { "epoch": 0.75, "grad_norm": 2.3050329311885926, "learning_rate": 8.798462417661775e-06, "loss": 0.1801, "step": 2732 }, { "epoch": 0.75, "grad_norm": 2.309140102025805, "learning_rate": 8.79750394387814e-06, "loss": 0.1691, "step": 2733 }, { "epoch": 0.75, "grad_norm": 2.3946601334039133, "learning_rate": 8.796545140207622e-06, "loss": 0.1955, "step": 2734 }, { "epoch": 0.75, "grad_norm": 1.9992360363500568, "learning_rate": 8.795586006733505e-06, "loss": 0.1381, "step": 2735 }, { "epoch": 0.75, "grad_norm": 2.309804412797823, "learning_rate": 8.794626543539114e-06, "loss": 0.1834, "step": 2736 }, { "epoch": 0.75, "grad_norm": 2.157161474511113, "learning_rate": 8.793666750707795e-06, "loss": 0.1843, "step": 2737 }, { "epoch": 0.75, "grad_norm": 2.4118610090961554, "learning_rate": 8.792706628322924e-06, "loss": 0.2072, "step": 2738 }, { "epoch": 0.75, "grad_norm": 2.153840754720639, "learning_rate": 8.791746176467908e-06, "loss": 0.1753, "step": 2739 }, { "epoch": 0.75, "grad_norm": 2.2432703281122897, "learning_rate": 8.79078539522618e-06, "loss": 0.2047, "step": 2740 }, { "epoch": 0.75, "grad_norm": 2.199319382192434, "learning_rate": 8.789824284681201e-06, "loss": 0.1813, "step": 2741 }, { "epoch": 0.75, "grad_norm": 2.4394308727285465, "learning_rate": 8.788862844916464e-06, "loss": 0.1931, "step": 2742 }, { "epoch": 0.75, "grad_norm": 2.3116635056711408, "learning_rate": 8.787901076015487e-06, "loss": 0.1791, "step": 2743 }, { "epoch": 0.75, "grad_norm": 2.078360072119509, "learning_rate": 8.78693897806182e-06, "loss": 0.1734, "step": 2744 }, { "epoch": 0.75, "grad_norm": 2.6638077309788724, "learning_rate": 8.78597655113904e-06, "loss": 0.207, "step": 2745 }, { "epoch": 0.75, "grad_norm": 2.381023662094938, "learning_rate": 8.78501379533075e-06, "loss": 0.2085, "step": 2746 }, { "epoch": 0.75, "grad_norm": 2.1840748034821784, "learning_rate": 8.784050710720587e-06, "loss": 0.1615, "step": 2747 }, { "epoch": 0.75, "grad_norm": 2.2008103336316793, "learning_rate": 8.783087297392212e-06, "loss": 0.1742, "step": 2748 }, { "epoch": 0.75, "grad_norm": 2.232934320641409, "learning_rate": 8.782123555429315e-06, "loss": 0.1971, "step": 2749 }, { "epoch": 0.75, "grad_norm": 2.375914138269291, "learning_rate": 8.78115948491562e-06, "loss": 0.2037, "step": 2750 }, { "epoch": 0.75, "grad_norm": 2.345778592442171, "learning_rate": 8.780195085934871e-06, "loss": 0.1819, "step": 2751 }, { "epoch": 0.75, "grad_norm": 2.7953484726190094, "learning_rate": 8.779230358570845e-06, "loss": 0.2017, "step": 2752 }, { "epoch": 0.75, "grad_norm": 2.5488411936839976, "learning_rate": 8.77826530290735e-06, "loss": 0.2177, "step": 2753 }, { "epoch": 0.75, "grad_norm": 2.45947770505814, "learning_rate": 8.777299919028217e-06, "loss": 0.1952, "step": 2754 }, { "epoch": 0.75, "grad_norm": 2.6336880197170376, "learning_rate": 8.77633420701731e-06, "loss": 0.2218, "step": 2755 }, { "epoch": 0.75, "grad_norm": 2.282826229800347, "learning_rate": 8.775368166958518e-06, "loss": 0.1649, "step": 2756 }, { "epoch": 0.75, "grad_norm": 2.3111710868660253, "learning_rate": 8.774401798935763e-06, "loss": 0.209, "step": 2757 }, { "epoch": 0.75, "grad_norm": 2.4752426681147495, "learning_rate": 8.773435103032992e-06, "loss": 0.1833, "step": 2758 }, { "epoch": 0.75, "grad_norm": 2.2713680512389063, "learning_rate": 8.77246807933418e-06, "loss": 0.1962, "step": 2759 }, { "epoch": 0.75, "grad_norm": 2.117424035034632, "learning_rate": 8.771500727923332e-06, "loss": 0.1544, "step": 2760 }, { "epoch": 0.75, "grad_norm": 2.4703833500846306, "learning_rate": 8.770533048884483e-06, "loss": 0.1948, "step": 2761 }, { "epoch": 0.75, "grad_norm": 2.163878655433132, "learning_rate": 8.769565042301692e-06, "loss": 0.1916, "step": 2762 }, { "epoch": 0.75, "grad_norm": 2.4381167007851654, "learning_rate": 8.768596708259052e-06, "loss": 0.2201, "step": 2763 }, { "epoch": 0.75, "grad_norm": 2.2743277856091626, "learning_rate": 8.767628046840677e-06, "loss": 0.161, "step": 2764 }, { "epoch": 0.75, "grad_norm": 2.5351153775040127, "learning_rate": 8.766659058130719e-06, "loss": 0.2141, "step": 2765 }, { "epoch": 0.76, "grad_norm": 2.4758388521291677, "learning_rate": 8.765689742213353e-06, "loss": 0.1988, "step": 2766 }, { "epoch": 0.76, "grad_norm": 2.5028856392124017, "learning_rate": 8.764720099172781e-06, "loss": 0.2122, "step": 2767 }, { "epoch": 0.76, "grad_norm": 3.0529987408669244, "learning_rate": 8.763750129093236e-06, "loss": 0.2422, "step": 2768 }, { "epoch": 0.76, "grad_norm": 2.500728904048456, "learning_rate": 8.762779832058978e-06, "loss": 0.1867, "step": 2769 }, { "epoch": 0.76, "grad_norm": 2.355871069715481, "learning_rate": 8.761809208154297e-06, "loss": 0.1948, "step": 2770 }, { "epoch": 0.76, "grad_norm": 2.586134685319701, "learning_rate": 8.760838257463511e-06, "loss": 0.2041, "step": 2771 }, { "epoch": 0.76, "grad_norm": 2.099813190305629, "learning_rate": 8.759866980070963e-06, "loss": 0.1795, "step": 2772 }, { "epoch": 0.76, "grad_norm": 2.399160429165581, "learning_rate": 8.758895376061032e-06, "loss": 0.1882, "step": 2773 }, { "epoch": 0.76, "grad_norm": 2.220740916464358, "learning_rate": 8.757923445518116e-06, "loss": 0.1793, "step": 2774 }, { "epoch": 0.76, "grad_norm": 2.456157289890256, "learning_rate": 8.75695118852665e-06, "loss": 0.2157, "step": 2775 }, { "epoch": 0.76, "grad_norm": 2.523721127510485, "learning_rate": 8.755978605171089e-06, "loss": 0.1935, "step": 2776 }, { "epoch": 0.76, "grad_norm": 2.47234853171708, "learning_rate": 8.755005695535925e-06, "loss": 0.1925, "step": 2777 }, { "epoch": 0.76, "grad_norm": 2.1301879411098334, "learning_rate": 8.754032459705672e-06, "loss": 0.1751, "step": 2778 }, { "epoch": 0.76, "grad_norm": 2.4418392813033867, "learning_rate": 8.753058897764874e-06, "loss": 0.1967, "step": 2779 }, { "epoch": 0.76, "grad_norm": 2.6905797584173454, "learning_rate": 8.752085009798106e-06, "loss": 0.1807, "step": 2780 }, { "epoch": 0.76, "grad_norm": 2.494328923529313, "learning_rate": 8.751110795889966e-06, "loss": 0.1697, "step": 2781 }, { "epoch": 0.76, "grad_norm": 2.3040684749027185, "learning_rate": 8.750136256125085e-06, "loss": 0.172, "step": 2782 }, { "epoch": 0.76, "grad_norm": 2.4668805819280553, "learning_rate": 8.749161390588121e-06, "loss": 0.1678, "step": 2783 }, { "epoch": 0.76, "grad_norm": 4.836425295505188, "learning_rate": 8.74818619936376e-06, "loss": 0.2205, "step": 2784 }, { "epoch": 0.76, "grad_norm": 2.550242630675199, "learning_rate": 8.747210682536715e-06, "loss": 0.1905, "step": 2785 }, { "epoch": 0.76, "grad_norm": 2.449602648461097, "learning_rate": 8.746234840191729e-06, "loss": 0.2055, "step": 2786 }, { "epoch": 0.76, "grad_norm": 2.5010618604504757, "learning_rate": 8.745258672413574e-06, "loss": 0.1836, "step": 2787 }, { "epoch": 0.76, "grad_norm": 2.474184030193847, "learning_rate": 8.744282179287049e-06, "loss": 0.237, "step": 2788 }, { "epoch": 0.76, "grad_norm": 2.1031667080320315, "learning_rate": 8.743305360896978e-06, "loss": 0.1334, "step": 2789 }, { "epoch": 0.76, "grad_norm": 2.565738634157848, "learning_rate": 8.742328217328221e-06, "loss": 0.1951, "step": 2790 }, { "epoch": 0.76, "grad_norm": 2.7330100746431305, "learning_rate": 8.741350748665662e-06, "loss": 0.202, "step": 2791 }, { "epoch": 0.76, "grad_norm": 2.30154469479652, "learning_rate": 8.74037295499421e-06, "loss": 0.2157, "step": 2792 }, { "epoch": 0.76, "grad_norm": 2.189408271568324, "learning_rate": 8.739394836398806e-06, "loss": 0.1714, "step": 2793 }, { "epoch": 0.76, "grad_norm": 2.224480666727112, "learning_rate": 8.73841639296442e-06, "loss": 0.1877, "step": 2794 }, { "epoch": 0.76, "grad_norm": 2.2626532348442527, "learning_rate": 8.737437624776047e-06, "loss": 0.1926, "step": 2795 }, { "epoch": 0.76, "grad_norm": 2.628893566052792, "learning_rate": 8.736458531918714e-06, "loss": 0.218, "step": 2796 }, { "epoch": 0.76, "grad_norm": 2.2522327405947307, "learning_rate": 8.735479114477472e-06, "loss": 0.1874, "step": 2797 }, { "epoch": 0.76, "grad_norm": 2.591948674583733, "learning_rate": 8.734499372537406e-06, "loss": 0.1992, "step": 2798 }, { "epoch": 0.76, "grad_norm": 2.51752308255524, "learning_rate": 8.73351930618362e-06, "loss": 0.2157, "step": 2799 }, { "epoch": 0.76, "grad_norm": 2.3201063266190713, "learning_rate": 8.732538915501257e-06, "loss": 0.1637, "step": 2800 }, { "epoch": 0.76, "grad_norm": 2.4899502973650116, "learning_rate": 8.73155820057548e-06, "loss": 0.2145, "step": 2801 }, { "epoch": 0.76, "grad_norm": 2.129743947493152, "learning_rate": 8.730577161491486e-06, "loss": 0.1636, "step": 2802 }, { "epoch": 0.77, "grad_norm": 2.1384861431605815, "learning_rate": 8.729595798334494e-06, "loss": 0.1534, "step": 2803 }, { "epoch": 0.77, "grad_norm": 2.7007994397624007, "learning_rate": 8.728614111189756e-06, "loss": 0.2017, "step": 2804 }, { "epoch": 0.77, "grad_norm": 2.780240542908485, "learning_rate": 8.72763210014255e-06, "loss": 0.2, "step": 2805 }, { "epoch": 0.77, "grad_norm": 3.1232296947977547, "learning_rate": 8.726649765278184e-06, "loss": 0.1977, "step": 2806 }, { "epoch": 0.77, "grad_norm": 2.4397195719291567, "learning_rate": 8.72566710668199e-06, "loss": 0.201, "step": 2807 }, { "epoch": 0.77, "grad_norm": 2.6438181958017797, "learning_rate": 8.724684124439336e-06, "loss": 0.2249, "step": 2808 }, { "epoch": 0.77, "grad_norm": 2.3711227058858615, "learning_rate": 8.723700818635608e-06, "loss": 0.1919, "step": 2809 }, { "epoch": 0.77, "grad_norm": 2.180779817131397, "learning_rate": 8.722717189356226e-06, "loss": 0.1603, "step": 2810 }, { "epoch": 0.77, "grad_norm": 2.49436174495027, "learning_rate": 8.72173323668664e-06, "loss": 0.2141, "step": 2811 }, { "epoch": 0.77, "grad_norm": 2.2868530248417716, "learning_rate": 8.720748960712323e-06, "loss": 0.1736, "step": 2812 }, { "epoch": 0.77, "grad_norm": 2.383150595717509, "learning_rate": 8.71976436151878e-06, "loss": 0.1723, "step": 2813 }, { "epoch": 0.77, "grad_norm": 2.119588970630532, "learning_rate": 8.718779439191543e-06, "loss": 0.1602, "step": 2814 }, { "epoch": 0.77, "grad_norm": 2.3213459267634593, "learning_rate": 8.717794193816166e-06, "loss": 0.1608, "step": 2815 }, { "epoch": 0.77, "grad_norm": 2.4550755801244257, "learning_rate": 8.716808625478245e-06, "loss": 0.1963, "step": 2816 }, { "epoch": 0.77, "grad_norm": 2.4747411684013088, "learning_rate": 8.715822734263391e-06, "loss": 0.1783, "step": 2817 }, { "epoch": 0.77, "grad_norm": 2.572036209057203, "learning_rate": 8.714836520257248e-06, "loss": 0.2092, "step": 2818 }, { "epoch": 0.77, "grad_norm": 2.7284523647952152, "learning_rate": 8.71384998354549e-06, "loss": 0.2255, "step": 2819 }, { "epoch": 0.77, "grad_norm": 2.4605970978778173, "learning_rate": 8.712863124213814e-06, "loss": 0.1908, "step": 2820 }, { "epoch": 0.77, "grad_norm": 2.4768126847342353, "learning_rate": 8.711875942347949e-06, "loss": 0.2375, "step": 2821 }, { "epoch": 0.77, "grad_norm": 2.4665160610714607, "learning_rate": 8.710888438033651e-06, "loss": 0.2214, "step": 2822 }, { "epoch": 0.77, "grad_norm": 2.282577791392892, "learning_rate": 8.709900611356703e-06, "loss": 0.162, "step": 2823 }, { "epoch": 0.77, "grad_norm": 2.026581680861299, "learning_rate": 8.708912462402921e-06, "loss": 0.1699, "step": 2824 }, { "epoch": 0.77, "grad_norm": 2.4428791122302482, "learning_rate": 8.70792399125814e-06, "loss": 0.1952, "step": 2825 }, { "epoch": 0.77, "grad_norm": 2.3454189423426235, "learning_rate": 8.706935198008228e-06, "loss": 0.1852, "step": 2826 }, { "epoch": 0.77, "grad_norm": 2.89607961041875, "learning_rate": 8.705946082739085e-06, "loss": 0.2184, "step": 2827 }, { "epoch": 0.77, "grad_norm": 2.4226062878841166, "learning_rate": 8.70495664553663e-06, "loss": 0.1992, "step": 2828 }, { "epoch": 0.77, "grad_norm": 2.573541371576731, "learning_rate": 8.703966886486819e-06, "loss": 0.2227, "step": 2829 }, { "epoch": 0.77, "grad_norm": 2.4774018351164053, "learning_rate": 8.702976805675629e-06, "loss": 0.2, "step": 2830 }, { "epoch": 0.77, "grad_norm": 2.0534267382939277, "learning_rate": 8.70198640318907e-06, "loss": 0.1667, "step": 2831 }, { "epoch": 0.77, "grad_norm": 2.5087219925522373, "learning_rate": 8.700995679113175e-06, "loss": 0.2074, "step": 2832 }, { "epoch": 0.77, "grad_norm": 2.9257746165405534, "learning_rate": 8.70000463353401e-06, "loss": 0.1813, "step": 2833 }, { "epoch": 0.77, "grad_norm": 2.273680968541367, "learning_rate": 8.699013266537663e-06, "loss": 0.185, "step": 2834 }, { "epoch": 0.77, "grad_norm": 2.3327119556671003, "learning_rate": 8.698021578210258e-06, "loss": 0.2063, "step": 2835 }, { "epoch": 0.77, "grad_norm": 2.1195830299768024, "learning_rate": 8.697029568637942e-06, "loss": 0.1717, "step": 2836 }, { "epoch": 0.77, "grad_norm": 2.1554648168964543, "learning_rate": 8.696037237906887e-06, "loss": 0.1708, "step": 2837 }, { "epoch": 0.77, "grad_norm": 2.193543189385725, "learning_rate": 8.695044586103297e-06, "loss": 0.1869, "step": 2838 }, { "epoch": 0.78, "grad_norm": 2.330837839686286, "learning_rate": 8.694051613313404e-06, "loss": 0.1728, "step": 2839 }, { "epoch": 0.78, "grad_norm": 2.5632516650720967, "learning_rate": 8.693058319623466e-06, "loss": 0.2162, "step": 2840 }, { "epoch": 0.78, "grad_norm": 2.3568423874980344, "learning_rate": 8.692064705119773e-06, "loss": 0.1773, "step": 2841 }, { "epoch": 0.78, "grad_norm": 2.2891638023206524, "learning_rate": 8.691070769888637e-06, "loss": 0.176, "step": 2842 }, { "epoch": 0.78, "grad_norm": 2.484993679095583, "learning_rate": 8.690076514016399e-06, "loss": 0.1897, "step": 2843 }, { "epoch": 0.78, "grad_norm": 2.4749565428766696, "learning_rate": 8.689081937589432e-06, "loss": 0.2004, "step": 2844 }, { "epoch": 0.78, "grad_norm": 2.6288869732455398, "learning_rate": 8.688087040694133e-06, "loss": 0.1926, "step": 2845 }, { "epoch": 0.78, "grad_norm": 2.1442859947017343, "learning_rate": 8.68709182341693e-06, "loss": 0.1631, "step": 2846 }, { "epoch": 0.78, "grad_norm": 2.3355028331418413, "learning_rate": 8.686096285844274e-06, "loss": 0.1781, "step": 2847 }, { "epoch": 0.78, "grad_norm": 2.1889818725678225, "learning_rate": 8.68510042806265e-06, "loss": 0.1887, "step": 2848 }, { "epoch": 0.78, "grad_norm": 2.454479286130754, "learning_rate": 8.684104250158565e-06, "loss": 0.2128, "step": 2849 }, { "epoch": 0.78, "grad_norm": 2.26999384322895, "learning_rate": 8.683107752218557e-06, "loss": 0.158, "step": 2850 }, { "epoch": 0.78, "grad_norm": 2.4794635176905886, "learning_rate": 8.682110934329191e-06, "loss": 0.1707, "step": 2851 }, { "epoch": 0.78, "grad_norm": 2.6289178468210173, "learning_rate": 8.681113796577063e-06, "loss": 0.205, "step": 2852 }, { "epoch": 0.78, "grad_norm": 2.3575225229142247, "learning_rate": 8.680116339048787e-06, "loss": 0.2212, "step": 2853 }, { "epoch": 0.78, "grad_norm": 2.1829393498673357, "learning_rate": 8.679118561831018e-06, "loss": 0.1676, "step": 2854 }, { "epoch": 0.78, "grad_norm": 2.1871362611364096, "learning_rate": 8.678120465010431e-06, "loss": 0.1775, "step": 2855 }, { "epoch": 0.78, "grad_norm": 2.4979950234212405, "learning_rate": 8.677122048673727e-06, "loss": 0.2119, "step": 2856 }, { "epoch": 0.78, "grad_norm": 2.304731113369537, "learning_rate": 8.676123312907641e-06, "loss": 0.1798, "step": 2857 }, { "epoch": 0.78, "grad_norm": 2.3456569745337896, "learning_rate": 8.675124257798933e-06, "loss": 0.1984, "step": 2858 }, { "epoch": 0.78, "grad_norm": 2.419655888175134, "learning_rate": 8.674124883434386e-06, "loss": 0.199, "step": 2859 }, { "epoch": 0.78, "grad_norm": 2.6063851727841976, "learning_rate": 8.67312518990082e-06, "loss": 0.1833, "step": 2860 }, { "epoch": 0.78, "grad_norm": 2.351231335326036, "learning_rate": 8.672125177285073e-06, "loss": 0.1712, "step": 2861 }, { "epoch": 0.78, "grad_norm": 2.5861423461475384, "learning_rate": 8.67112484567402e-06, "loss": 0.2243, "step": 2862 }, { "epoch": 0.78, "grad_norm": 2.277944174305463, "learning_rate": 8.670124195154557e-06, "loss": 0.1825, "step": 2863 }, { "epoch": 0.78, "grad_norm": 2.388511731547195, "learning_rate": 8.669123225813611e-06, "loss": 0.2155, "step": 2864 }, { "epoch": 0.78, "grad_norm": 2.0744506189039975, "learning_rate": 8.668121937738134e-06, "loss": 0.1904, "step": 2865 }, { "epoch": 0.78, "grad_norm": 2.540421638416718, "learning_rate": 8.667120331015107e-06, "loss": 0.2151, "step": 2866 }, { "epoch": 0.78, "grad_norm": 2.1769251559270395, "learning_rate": 8.666118405731542e-06, "loss": 0.2024, "step": 2867 }, { "epoch": 0.78, "grad_norm": 2.2955279155522024, "learning_rate": 8.665116161974473e-06, "loss": 0.1532, "step": 2868 }, { "epoch": 0.78, "grad_norm": 2.252405629246522, "learning_rate": 8.664113599830965e-06, "loss": 0.1902, "step": 2869 }, { "epoch": 0.78, "grad_norm": 2.401060287230373, "learning_rate": 8.66311071938811e-06, "loss": 0.2054, "step": 2870 }, { "epoch": 0.78, "grad_norm": 2.3898162386650443, "learning_rate": 8.662107520733027e-06, "loss": 0.1725, "step": 2871 }, { "epoch": 0.78, "grad_norm": 2.2622532107999294, "learning_rate": 8.661104003952866e-06, "loss": 0.2061, "step": 2872 }, { "epoch": 0.78, "grad_norm": 2.2400789912091432, "learning_rate": 8.660100169134797e-06, "loss": 0.1832, "step": 2873 }, { "epoch": 0.78, "grad_norm": 2.0082931945123224, "learning_rate": 8.659096016366027e-06, "loss": 0.1442, "step": 2874 }, { "epoch": 0.78, "grad_norm": 2.149202520874365, "learning_rate": 8.658091545733785e-06, "loss": 0.1542, "step": 2875 }, { "epoch": 0.79, "grad_norm": 2.4157046924148946, "learning_rate": 8.657086757325328e-06, "loss": 0.2143, "step": 2876 }, { "epoch": 0.79, "grad_norm": 2.4375182585215973, "learning_rate": 8.65608165122794e-06, "loss": 0.1901, "step": 2877 }, { "epoch": 0.79, "grad_norm": 2.6077155006900785, "learning_rate": 8.655076227528937e-06, "loss": 0.2247, "step": 2878 }, { "epoch": 0.79, "grad_norm": 2.44056015973225, "learning_rate": 8.654070486315658e-06, "loss": 0.1822, "step": 2879 }, { "epoch": 0.79, "grad_norm": 2.0831534440376775, "learning_rate": 8.65306442767547e-06, "loss": 0.1228, "step": 2880 }, { "epoch": 0.79, "grad_norm": 2.2978488650552795, "learning_rate": 8.652058051695772e-06, "loss": 0.2031, "step": 2881 }, { "epoch": 0.79, "grad_norm": 2.200378755762137, "learning_rate": 8.651051358463984e-06, "loss": 0.1636, "step": 2882 }, { "epoch": 0.79, "grad_norm": 2.3073567230467473, "learning_rate": 8.650044348067558e-06, "loss": 0.1845, "step": 2883 }, { "epoch": 0.79, "grad_norm": 2.361397506047959, "learning_rate": 8.649037020593974e-06, "loss": 0.2009, "step": 2884 }, { "epoch": 0.79, "grad_norm": 2.513137026791609, "learning_rate": 8.648029376130735e-06, "loss": 0.1934, "step": 2885 }, { "epoch": 0.79, "grad_norm": 2.381683246996296, "learning_rate": 8.647021414765376e-06, "loss": 0.1841, "step": 2886 }, { "epoch": 0.79, "grad_norm": 2.0265282393780493, "learning_rate": 8.646013136585457e-06, "loss": 0.1438, "step": 2887 }, { "epoch": 0.79, "grad_norm": 2.499891823240599, "learning_rate": 8.64500454167857e-06, "loss": 0.1949, "step": 2888 }, { "epoch": 0.79, "grad_norm": 2.424010980934348, "learning_rate": 8.643995630132326e-06, "loss": 0.213, "step": 2889 }, { "epoch": 0.79, "grad_norm": 3.1534888939475394, "learning_rate": 8.642986402034373e-06, "loss": 0.1984, "step": 2890 }, { "epoch": 0.79, "grad_norm": 2.6980642477388956, "learning_rate": 8.641976857472378e-06, "loss": 0.1634, "step": 2891 }, { "epoch": 0.79, "grad_norm": 2.1505693614550183, "learning_rate": 8.640966996534043e-06, "loss": 0.1604, "step": 2892 }, { "epoch": 0.79, "grad_norm": 2.256234672206739, "learning_rate": 8.639956819307092e-06, "loss": 0.1914, "step": 2893 }, { "epoch": 0.79, "grad_norm": 2.0887059862256607, "learning_rate": 8.638946325879278e-06, "loss": 0.1628, "step": 2894 }, { "epoch": 0.79, "grad_norm": 2.3956840289056602, "learning_rate": 8.637935516338384e-06, "loss": 0.192, "step": 2895 }, { "epoch": 0.79, "grad_norm": 2.482079862747106, "learning_rate": 8.636924390772217e-06, "loss": 0.1735, "step": 2896 }, { "epoch": 0.79, "grad_norm": 2.3747609164764185, "learning_rate": 8.635912949268614e-06, "loss": 0.1864, "step": 2897 }, { "epoch": 0.79, "grad_norm": 2.2545481998290158, "learning_rate": 8.634901191915438e-06, "loss": 0.1375, "step": 2898 }, { "epoch": 0.79, "grad_norm": 2.1432853337502666, "learning_rate": 8.633889118800578e-06, "loss": 0.1604, "step": 2899 }, { "epoch": 0.79, "grad_norm": 2.195843722058196, "learning_rate": 8.632876730011955e-06, "loss": 0.1844, "step": 2900 }, { "epoch": 0.79, "grad_norm": 2.2080382931556977, "learning_rate": 8.631864025637511e-06, "loss": 0.1843, "step": 2901 }, { "epoch": 0.79, "grad_norm": 2.5617323967118195, "learning_rate": 8.630851005765223e-06, "loss": 0.2174, "step": 2902 }, { "epoch": 0.79, "grad_norm": 2.330087877657432, "learning_rate": 8.62983767048309e-06, "loss": 0.1574, "step": 2903 }, { "epoch": 0.79, "grad_norm": 2.6095905353837923, "learning_rate": 8.628824019879137e-06, "loss": 0.2094, "step": 2904 }, { "epoch": 0.79, "grad_norm": 3.113043926884788, "learning_rate": 8.627810054041423e-06, "loss": 0.1982, "step": 2905 }, { "epoch": 0.79, "grad_norm": 2.394773063638068, "learning_rate": 8.62679577305803e-06, "loss": 0.1764, "step": 2906 }, { "epoch": 0.79, "grad_norm": 2.237974930921654, "learning_rate": 8.625781177017066e-06, "loss": 0.1884, "step": 2907 }, { "epoch": 0.79, "grad_norm": 2.411786800496304, "learning_rate": 8.62476626600667e-06, "loss": 0.1936, "step": 2908 }, { "epoch": 0.79, "grad_norm": 2.676020172408539, "learning_rate": 8.623751040115007e-06, "loss": 0.1638, "step": 2909 }, { "epoch": 0.79, "grad_norm": 2.2170382092354903, "learning_rate": 8.622735499430267e-06, "loss": 0.182, "step": 2910 }, { "epoch": 0.79, "grad_norm": 2.5105724065421393, "learning_rate": 8.62171964404067e-06, "loss": 0.2039, "step": 2911 }, { "epoch": 0.79, "grad_norm": 2.371694386308627, "learning_rate": 8.620703474034466e-06, "loss": 0.1815, "step": 2912 }, { "epoch": 0.8, "grad_norm": 2.3530216986637034, "learning_rate": 8.619686989499926e-06, "loss": 0.2152, "step": 2913 }, { "epoch": 0.8, "grad_norm": 2.3072036955774147, "learning_rate": 8.61867019052535e-06, "loss": 0.1757, "step": 2914 }, { "epoch": 0.8, "grad_norm": 2.1529166265273343, "learning_rate": 8.617653077199073e-06, "loss": 0.1941, "step": 2915 }, { "epoch": 0.8, "grad_norm": 2.364777274540656, "learning_rate": 8.616635649609443e-06, "loss": 0.1929, "step": 2916 }, { "epoch": 0.8, "grad_norm": 2.2779472042187847, "learning_rate": 8.615617907844848e-06, "loss": 0.1985, "step": 2917 }, { "epoch": 0.8, "grad_norm": 2.214152739006492, "learning_rate": 8.614599851993697e-06, "loss": 0.1833, "step": 2918 }, { "epoch": 0.8, "grad_norm": 2.1385745194723342, "learning_rate": 8.613581482144428e-06, "loss": 0.1666, "step": 2919 }, { "epoch": 0.8, "grad_norm": 1.9988892369314604, "learning_rate": 8.612562798385508e-06, "loss": 0.1582, "step": 2920 }, { "epoch": 0.8, "grad_norm": 2.145132150836671, "learning_rate": 8.61154380080543e-06, "loss": 0.1851, "step": 2921 }, { "epoch": 0.8, "grad_norm": 2.324306245473324, "learning_rate": 8.610524489492709e-06, "loss": 0.2049, "step": 2922 }, { "epoch": 0.8, "grad_norm": 2.2495750454299692, "learning_rate": 8.609504864535896e-06, "loss": 0.1775, "step": 2923 }, { "epoch": 0.8, "grad_norm": 2.3201180801364054, "learning_rate": 8.608484926023564e-06, "loss": 0.1865, "step": 2924 }, { "epoch": 0.8, "grad_norm": 2.192310554152911, "learning_rate": 8.607464674044315e-06, "loss": 0.1771, "step": 2925 }, { "epoch": 0.8, "grad_norm": 2.5280041140581537, "learning_rate": 8.606444108686775e-06, "loss": 0.2193, "step": 2926 }, { "epoch": 0.8, "grad_norm": 2.121601675028039, "learning_rate": 8.605423230039605e-06, "loss": 0.1609, "step": 2927 }, { "epoch": 0.8, "grad_norm": 1.960615936056803, "learning_rate": 8.604402038191483e-06, "loss": 0.1492, "step": 2928 }, { "epoch": 0.8, "grad_norm": 2.234093324508485, "learning_rate": 8.603380533231123e-06, "loss": 0.1824, "step": 2929 }, { "epoch": 0.8, "grad_norm": 2.882866952366496, "learning_rate": 8.60235871524726e-06, "loss": 0.2241, "step": 2930 }, { "epoch": 0.8, "grad_norm": 2.2962682529563767, "learning_rate": 8.601336584328659e-06, "loss": 0.1876, "step": 2931 }, { "epoch": 0.8, "grad_norm": 2.3056479835593757, "learning_rate": 8.600314140564114e-06, "loss": 0.2271, "step": 2932 }, { "epoch": 0.8, "grad_norm": 2.4034838788210604, "learning_rate": 8.599291384042442e-06, "loss": 0.2053, "step": 2933 }, { "epoch": 0.8, "grad_norm": 2.107982531790283, "learning_rate": 8.598268314852492e-06, "loss": 0.202, "step": 2934 }, { "epoch": 0.8, "grad_norm": 2.455988044209978, "learning_rate": 8.597244933083133e-06, "loss": 0.2168, "step": 2935 }, { "epoch": 0.8, "grad_norm": 2.301990901965644, "learning_rate": 8.596221238823269e-06, "loss": 0.1734, "step": 2936 }, { "epoch": 0.8, "grad_norm": 2.0704976389102803, "learning_rate": 8.595197232161824e-06, "loss": 0.1816, "step": 2937 }, { "epoch": 0.8, "grad_norm": 2.1749843939542703, "learning_rate": 8.594172913187759e-06, "loss": 0.1621, "step": 2938 }, { "epoch": 0.8, "grad_norm": 2.3930631179372077, "learning_rate": 8.593148281990052e-06, "loss": 0.1699, "step": 2939 }, { "epoch": 0.8, "grad_norm": 2.3075746391802316, "learning_rate": 8.592123338657713e-06, "loss": 0.1931, "step": 2940 }, { "epoch": 0.8, "grad_norm": 2.2129723600778597, "learning_rate": 8.591098083279774e-06, "loss": 0.1624, "step": 2941 }, { "epoch": 0.8, "grad_norm": 2.0610936259969437, "learning_rate": 8.590072515945305e-06, "loss": 0.1711, "step": 2942 }, { "epoch": 0.8, "grad_norm": 2.74212252456524, "learning_rate": 8.589046636743394e-06, "loss": 0.2182, "step": 2943 }, { "epoch": 0.8, "grad_norm": 2.4806255070332215, "learning_rate": 8.588020445763156e-06, "loss": 0.2078, "step": 2944 }, { "epoch": 0.8, "grad_norm": 2.339736639385108, "learning_rate": 8.58699394309374e-06, "loss": 0.1504, "step": 2945 }, { "epoch": 0.8, "grad_norm": 2.393556450157724, "learning_rate": 8.585967128824313e-06, "loss": 0.1769, "step": 2946 }, { "epoch": 0.8, "grad_norm": 2.2620493563224335, "learning_rate": 8.584940003044078e-06, "loss": 0.1972, "step": 2947 }, { "epoch": 0.8, "grad_norm": 2.5604571786252897, "learning_rate": 8.583912565842258e-06, "loss": 0.2324, "step": 2948 }, { "epoch": 0.81, "grad_norm": 2.2752450658443713, "learning_rate": 8.582884817308106e-06, "loss": 0.1481, "step": 2949 }, { "epoch": 0.81, "grad_norm": 2.394276666398064, "learning_rate": 8.581856757530902e-06, "loss": 0.1835, "step": 2950 }, { "epoch": 0.81, "grad_norm": 2.45337753520434, "learning_rate": 8.580828386599955e-06, "loss": 0.2006, "step": 2951 }, { "epoch": 0.81, "grad_norm": 2.6284522610521486, "learning_rate": 8.579799704604597e-06, "loss": 0.1906, "step": 2952 }, { "epoch": 0.81, "grad_norm": 2.0366292427105246, "learning_rate": 8.57877071163419e-06, "loss": 0.1434, "step": 2953 }, { "epoch": 0.81, "grad_norm": 2.249679144464604, "learning_rate": 8.57774140777812e-06, "loss": 0.1718, "step": 2954 }, { "epoch": 0.81, "grad_norm": 2.652569711235235, "learning_rate": 8.576711793125804e-06, "loss": 0.1909, "step": 2955 }, { "epoch": 0.81, "grad_norm": 2.725252432798393, "learning_rate": 8.575681867766685e-06, "loss": 0.2155, "step": 2956 }, { "epoch": 0.81, "grad_norm": 1.890890149773693, "learning_rate": 8.574651631790229e-06, "loss": 0.1701, "step": 2957 }, { "epoch": 0.81, "grad_norm": 2.296949025815334, "learning_rate": 8.573621085285934e-06, "loss": 0.2085, "step": 2958 }, { "epoch": 0.81, "grad_norm": 2.029623932412452, "learning_rate": 8.572590228343322e-06, "loss": 0.1562, "step": 2959 }, { "epoch": 0.81, "grad_norm": 2.521867740967801, "learning_rate": 8.571559061051943e-06, "loss": 0.2039, "step": 2960 }, { "epoch": 0.81, "grad_norm": 2.3599661575880275, "learning_rate": 8.570527583501374e-06, "loss": 0.2269, "step": 2961 }, { "epoch": 0.81, "grad_norm": 2.3909147968253603, "learning_rate": 8.569495795781221e-06, "loss": 0.2201, "step": 2962 }, { "epoch": 0.81, "grad_norm": 2.4161298466611467, "learning_rate": 8.568463697981112e-06, "loss": 0.2188, "step": 2963 }, { "epoch": 0.81, "grad_norm": 2.240662591106111, "learning_rate": 8.567431290190705e-06, "loss": 0.1746, "step": 2964 }, { "epoch": 0.81, "grad_norm": 2.271007932073467, "learning_rate": 8.566398572499685e-06, "loss": 0.2024, "step": 2965 }, { "epoch": 0.81, "grad_norm": 2.3613354212421203, "learning_rate": 8.565365544997763e-06, "loss": 0.1807, "step": 2966 }, { "epoch": 0.81, "grad_norm": 2.498127016720821, "learning_rate": 8.56433220777468e-06, "loss": 0.217, "step": 2967 }, { "epoch": 0.81, "grad_norm": 2.2452359753478834, "learning_rate": 8.563298560920198e-06, "loss": 0.184, "step": 2968 }, { "epoch": 0.81, "grad_norm": 2.1818094509200914, "learning_rate": 8.562264604524112e-06, "loss": 0.1907, "step": 2969 }, { "epoch": 0.81, "grad_norm": 2.3948536569139094, "learning_rate": 8.56123033867624e-06, "loss": 0.2004, "step": 2970 }, { "epoch": 0.81, "grad_norm": 2.6730079276605223, "learning_rate": 8.560195763466428e-06, "loss": 0.1843, "step": 2971 }, { "epoch": 0.81, "grad_norm": 2.341976726801267, "learning_rate": 8.559160878984548e-06, "loss": 0.1884, "step": 2972 }, { "epoch": 0.81, "grad_norm": 2.1177567859730697, "learning_rate": 8.558125685320502e-06, "loss": 0.169, "step": 2973 }, { "epoch": 0.81, "grad_norm": 2.0266131283623263, "learning_rate": 8.557090182564215e-06, "loss": 0.1782, "step": 2974 }, { "epoch": 0.81, "grad_norm": 2.0774743600694725, "learning_rate": 8.556054370805642e-06, "loss": 0.167, "step": 2975 }, { "epoch": 0.81, "grad_norm": 1.9627260224740766, "learning_rate": 8.555018250134761e-06, "loss": 0.1465, "step": 2976 }, { "epoch": 0.81, "grad_norm": 2.2128573763105877, "learning_rate": 8.553981820641582e-06, "loss": 0.1614, "step": 2977 }, { "epoch": 0.81, "grad_norm": 2.44005337598289, "learning_rate": 8.552945082416135e-06, "loss": 0.1942, "step": 2978 }, { "epoch": 0.81, "grad_norm": 2.5369090014104283, "learning_rate": 8.551908035548486e-06, "loss": 0.1994, "step": 2979 }, { "epoch": 0.81, "grad_norm": 2.249862410876498, "learning_rate": 8.550870680128718e-06, "loss": 0.1732, "step": 2980 }, { "epoch": 0.81, "grad_norm": 2.4587537349780977, "learning_rate": 8.549833016246948e-06, "loss": 0.2018, "step": 2981 }, { "epoch": 0.81, "grad_norm": 2.1910853260507452, "learning_rate": 8.548795043993316e-06, "loss": 0.174, "step": 2982 }, { "epoch": 0.81, "grad_norm": 2.0283828584050654, "learning_rate": 8.547756763457993e-06, "loss": 0.1824, "step": 2983 }, { "epoch": 0.81, "grad_norm": 2.1443991911136666, "learning_rate": 8.54671817473117e-06, "loss": 0.197, "step": 2984 }, { "epoch": 0.81, "grad_norm": 2.466367033374896, "learning_rate": 8.54567927790307e-06, "loss": 0.2043, "step": 2985 }, { "epoch": 0.82, "grad_norm": 2.3139226927566536, "learning_rate": 8.544640073063941e-06, "loss": 0.2045, "step": 2986 }, { "epoch": 0.82, "grad_norm": 2.0997027130755943, "learning_rate": 8.543600560304059e-06, "loss": 0.1866, "step": 2987 }, { "epoch": 0.82, "grad_norm": 2.1389997681869377, "learning_rate": 8.542560739713726e-06, "loss": 0.167, "step": 2988 }, { "epoch": 0.82, "grad_norm": 2.2853143360034194, "learning_rate": 8.54152061138327e-06, "loss": 0.1751, "step": 2989 }, { "epoch": 0.82, "grad_norm": 2.1682988469979314, "learning_rate": 8.540480175403045e-06, "loss": 0.1736, "step": 2990 }, { "epoch": 0.82, "grad_norm": 2.4002264765603365, "learning_rate": 8.539439431863434e-06, "loss": 0.1904, "step": 2991 }, { "epoch": 0.82, "grad_norm": 2.427987034433787, "learning_rate": 8.538398380854848e-06, "loss": 0.1823, "step": 2992 }, { "epoch": 0.82, "grad_norm": 2.1709130880004173, "learning_rate": 8.53735702246772e-06, "loss": 0.1487, "step": 2993 }, { "epoch": 0.82, "grad_norm": 2.1147905296149347, "learning_rate": 8.536315356792513e-06, "loss": 0.1483, "step": 2994 }, { "epoch": 0.82, "grad_norm": 2.512373886935549, "learning_rate": 8.535273383919715e-06, "loss": 0.2015, "step": 2995 }, { "epoch": 0.82, "grad_norm": 2.8196023608468455, "learning_rate": 8.534231103939842e-06, "loss": 0.2452, "step": 2996 }, { "epoch": 0.82, "grad_norm": 2.5105635412716727, "learning_rate": 8.533188516943436e-06, "loss": 0.212, "step": 2997 }, { "epoch": 0.82, "grad_norm": 2.27547473821853, "learning_rate": 8.532145623021067e-06, "loss": 0.1748, "step": 2998 }, { "epoch": 0.82, "grad_norm": 2.24749024744668, "learning_rate": 8.53110242226333e-06, "loss": 0.205, "step": 2999 }, { "epoch": 0.82, "grad_norm": 2.296213499562884, "learning_rate": 8.530058914760846e-06, "loss": 0.1995, "step": 3000 }, { "epoch": 0.82, "grad_norm": 2.191079970170801, "learning_rate": 8.529015100604267e-06, "loss": 0.1424, "step": 3001 }, { "epoch": 0.82, "grad_norm": 2.3979563801602555, "learning_rate": 8.527970979884266e-06, "loss": 0.2067, "step": 3002 }, { "epoch": 0.82, "grad_norm": 2.1809938735388568, "learning_rate": 8.526926552691545e-06, "loss": 0.184, "step": 3003 }, { "epoch": 0.82, "grad_norm": 2.306277374137795, "learning_rate": 8.525881819116832e-06, "loss": 0.1918, "step": 3004 }, { "epoch": 0.82, "grad_norm": 2.34026456006689, "learning_rate": 8.524836779250886e-06, "loss": 0.1494, "step": 3005 }, { "epoch": 0.82, "grad_norm": 2.2721382838304565, "learning_rate": 8.523791433184486e-06, "loss": 0.2074, "step": 3006 }, { "epoch": 0.82, "grad_norm": 2.222874464450221, "learning_rate": 8.522745781008442e-06, "loss": 0.2302, "step": 3007 }, { "epoch": 0.82, "grad_norm": 2.325379369042873, "learning_rate": 8.521699822813587e-06, "loss": 0.1908, "step": 3008 }, { "epoch": 0.82, "grad_norm": 2.4122651401773894, "learning_rate": 8.520653558690785e-06, "loss": 0.2084, "step": 3009 }, { "epoch": 0.82, "grad_norm": 2.1546434756554875, "learning_rate": 8.519606988730924e-06, "loss": 0.1842, "step": 3010 }, { "epoch": 0.82, "grad_norm": 6.865708444249605, "learning_rate": 8.518560113024918e-06, "loss": 0.169, "step": 3011 }, { "epoch": 0.82, "grad_norm": 2.2554067090977776, "learning_rate": 8.51751293166371e-06, "loss": 0.1703, "step": 3012 }, { "epoch": 0.82, "grad_norm": 2.357181088308605, "learning_rate": 8.516465444738264e-06, "loss": 0.1735, "step": 3013 }, { "epoch": 0.82, "grad_norm": 2.425931728761658, "learning_rate": 8.51541765233958e-06, "loss": 0.1971, "step": 3014 }, { "epoch": 0.82, "grad_norm": 2.365361162220478, "learning_rate": 8.514369554558677e-06, "loss": 0.2018, "step": 3015 }, { "epoch": 0.82, "grad_norm": 2.0394891389537, "learning_rate": 8.513321151486602e-06, "loss": 0.1462, "step": 3016 }, { "epoch": 0.82, "grad_norm": 2.6773440927342125, "learning_rate": 8.512272443214428e-06, "loss": 0.1901, "step": 3017 }, { "epoch": 0.82, "grad_norm": 2.4491676839132874, "learning_rate": 8.511223429833258e-06, "loss": 0.1913, "step": 3018 }, { "epoch": 0.82, "grad_norm": 2.0677356406947327, "learning_rate": 8.510174111434219e-06, "loss": 0.1713, "step": 3019 }, { "epoch": 0.82, "grad_norm": 2.3485120970242437, "learning_rate": 8.509124488108462e-06, "loss": 0.1811, "step": 3020 }, { "epoch": 0.82, "grad_norm": 2.46341352234346, "learning_rate": 8.508074559947172e-06, "loss": 0.1709, "step": 3021 }, { "epoch": 0.83, "grad_norm": 8.685440054496452, "learning_rate": 8.507024327041551e-06, "loss": 0.1824, "step": 3022 }, { "epoch": 0.83, "grad_norm": 2.4808934965049363, "learning_rate": 8.505973789482833e-06, "loss": 0.2093, "step": 3023 }, { "epoch": 0.83, "grad_norm": 1.8556894455660642, "learning_rate": 8.50492294736228e-06, "loss": 0.1524, "step": 3024 }, { "epoch": 0.83, "grad_norm": 2.4881506016204358, "learning_rate": 8.503871800771175e-06, "loss": 0.2049, "step": 3025 }, { "epoch": 0.83, "grad_norm": 2.240489646190458, "learning_rate": 8.502820349800832e-06, "loss": 0.196, "step": 3026 }, { "epoch": 0.83, "grad_norm": 2.691420655024295, "learning_rate": 8.50176859454259e-06, "loss": 0.2091, "step": 3027 }, { "epoch": 0.83, "grad_norm": 3.053217633237131, "learning_rate": 8.500716535087815e-06, "loss": 0.1731, "step": 3028 }, { "epoch": 0.83, "grad_norm": 2.2403328224929324, "learning_rate": 8.499664171527895e-06, "loss": 0.1428, "step": 3029 }, { "epoch": 0.83, "grad_norm": 2.4978588685201775, "learning_rate": 8.498611503954253e-06, "loss": 0.2214, "step": 3030 }, { "epoch": 0.83, "grad_norm": 2.4065018938809706, "learning_rate": 8.497558532458333e-06, "loss": 0.2216, "step": 3031 }, { "epoch": 0.83, "grad_norm": 2.5129761814306266, "learning_rate": 8.496505257131602e-06, "loss": 0.2129, "step": 3032 }, { "epoch": 0.83, "grad_norm": 2.363418496015452, "learning_rate": 8.495451678065563e-06, "loss": 0.1981, "step": 3033 }, { "epoch": 0.83, "grad_norm": 2.3725663050997725, "learning_rate": 8.494397795351735e-06, "loss": 0.1729, "step": 3034 }, { "epoch": 0.83, "grad_norm": 2.6043223027279883, "learning_rate": 8.49334360908167e-06, "loss": 0.2415, "step": 3035 }, { "epoch": 0.83, "grad_norm": 3.200892946502124, "learning_rate": 8.492289119346944e-06, "loss": 0.2071, "step": 3036 }, { "epoch": 0.83, "grad_norm": 2.2410103218519954, "learning_rate": 8.491234326239162e-06, "loss": 0.1557, "step": 3037 }, { "epoch": 0.83, "grad_norm": 2.4070035528104743, "learning_rate": 8.49017922984995e-06, "loss": 0.207, "step": 3038 }, { "epoch": 0.83, "grad_norm": 2.237562903347781, "learning_rate": 8.489123830270966e-06, "loss": 0.1919, "step": 3039 }, { "epoch": 0.83, "grad_norm": 2.3614936089828213, "learning_rate": 8.488068127593892e-06, "loss": 0.2001, "step": 3040 }, { "epoch": 0.83, "grad_norm": 2.175831430982278, "learning_rate": 8.487012121910435e-06, "loss": 0.1487, "step": 3041 }, { "epoch": 0.83, "grad_norm": 2.1806127137435847, "learning_rate": 8.485955813312328e-06, "loss": 0.1924, "step": 3042 }, { "epoch": 0.83, "grad_norm": 2.2384261705794195, "learning_rate": 8.484899201891336e-06, "loss": 0.1762, "step": 3043 }, { "epoch": 0.83, "grad_norm": 2.343339274052141, "learning_rate": 8.483842287739244e-06, "loss": 0.1853, "step": 3044 }, { "epoch": 0.83, "grad_norm": 2.910824238326714, "learning_rate": 8.482785070947866e-06, "loss": 0.2042, "step": 3045 }, { "epoch": 0.83, "grad_norm": 2.7607282515390614, "learning_rate": 8.48172755160904e-06, "loss": 0.2072, "step": 3046 }, { "epoch": 0.83, "grad_norm": 2.146948793894683, "learning_rate": 8.480669729814635e-06, "loss": 0.1675, "step": 3047 }, { "epoch": 0.83, "grad_norm": 2.218717007846125, "learning_rate": 8.479611605656541e-06, "loss": 0.1702, "step": 3048 }, { "epoch": 0.83, "grad_norm": 2.325550412706671, "learning_rate": 8.478553179226676e-06, "loss": 0.1925, "step": 3049 }, { "epoch": 0.83, "grad_norm": 2.242273133710287, "learning_rate": 8.477494450616988e-06, "loss": 0.1816, "step": 3050 }, { "epoch": 0.83, "grad_norm": 2.94647079451854, "learning_rate": 8.476435419919446e-06, "loss": 0.2217, "step": 3051 }, { "epoch": 0.83, "grad_norm": 2.1939567209115025, "learning_rate": 8.475376087226048e-06, "loss": 0.1751, "step": 3052 }, { "epoch": 0.83, "grad_norm": 2.2597485431400823, "learning_rate": 8.474316452628816e-06, "loss": 0.2272, "step": 3053 }, { "epoch": 0.83, "grad_norm": 2.382621343745723, "learning_rate": 8.473256516219803e-06, "loss": 0.1747, "step": 3054 }, { "epoch": 0.83, "grad_norm": 2.4858681339125206, "learning_rate": 8.472196278091083e-06, "loss": 0.1875, "step": 3055 }, { "epoch": 0.83, "grad_norm": 2.211265026347421, "learning_rate": 8.471135738334758e-06, "loss": 0.1826, "step": 3056 }, { "epoch": 0.83, "grad_norm": 2.5829061069828247, "learning_rate": 8.470074897042958e-06, "loss": 0.1901, "step": 3057 }, { "epoch": 0.83, "grad_norm": 2.4037883591247065, "learning_rate": 8.469013754307834e-06, "loss": 0.2005, "step": 3058 }, { "epoch": 0.84, "grad_norm": 2.1486518031212425, "learning_rate": 8.46795231022157e-06, "loss": 0.1593, "step": 3059 }, { "epoch": 0.84, "grad_norm": 2.3014271772524304, "learning_rate": 8.466890564876374e-06, "loss": 0.1904, "step": 3060 }, { "epoch": 0.84, "grad_norm": 2.3152604521832143, "learning_rate": 8.465828518364476e-06, "loss": 0.19, "step": 3061 }, { "epoch": 0.84, "grad_norm": 2.496576917370413, "learning_rate": 8.464766170778138e-06, "loss": 0.2097, "step": 3062 }, { "epoch": 0.84, "grad_norm": 2.0387693913764022, "learning_rate": 8.463703522209644e-06, "loss": 0.1546, "step": 3063 }, { "epoch": 0.84, "grad_norm": 2.279854354405747, "learning_rate": 8.462640572751306e-06, "loss": 0.1831, "step": 3064 }, { "epoch": 0.84, "grad_norm": 2.3758804873078194, "learning_rate": 8.461577322495463e-06, "loss": 0.1929, "step": 3065 }, { "epoch": 0.84, "grad_norm": 2.385642994484938, "learning_rate": 8.460513771534475e-06, "loss": 0.2021, "step": 3066 }, { "epoch": 0.84, "grad_norm": 2.700259176909793, "learning_rate": 8.459449919960737e-06, "loss": 0.2065, "step": 3067 }, { "epoch": 0.84, "grad_norm": 2.631203541207082, "learning_rate": 8.458385767866662e-06, "loss": 0.1705, "step": 3068 }, { "epoch": 0.84, "grad_norm": 2.447619822150682, "learning_rate": 8.457321315344695e-06, "loss": 0.1886, "step": 3069 }, { "epoch": 0.84, "grad_norm": 2.4516625794127993, "learning_rate": 8.456256562487301e-06, "loss": 0.1905, "step": 3070 }, { "epoch": 0.84, "grad_norm": 2.299592883000959, "learning_rate": 8.455191509386975e-06, "loss": 0.182, "step": 3071 }, { "epoch": 0.84, "grad_norm": 2.2269770745840445, "learning_rate": 8.45412615613624e-06, "loss": 0.2016, "step": 3072 }, { "epoch": 0.84, "grad_norm": 2.085420537972154, "learning_rate": 8.45306050282764e-06, "loss": 0.1708, "step": 3073 }, { "epoch": 0.84, "grad_norm": 2.3862694234581605, "learning_rate": 8.45199454955375e-06, "loss": 0.1878, "step": 3074 }, { "epoch": 0.84, "grad_norm": 2.353820514730244, "learning_rate": 8.450928296407168e-06, "loss": 0.1744, "step": 3075 }, { "epoch": 0.84, "grad_norm": 2.3247469904138276, "learning_rate": 8.449861743480517e-06, "loss": 0.1847, "step": 3076 }, { "epoch": 0.84, "grad_norm": 2.074744225118243, "learning_rate": 8.44879489086645e-06, "loss": 0.1635, "step": 3077 }, { "epoch": 0.84, "grad_norm": 1.8597912145251223, "learning_rate": 8.44772773865764e-06, "loss": 0.1338, "step": 3078 }, { "epoch": 0.84, "grad_norm": 2.125846950269848, "learning_rate": 8.446660286946796e-06, "loss": 0.1703, "step": 3079 }, { "epoch": 0.84, "grad_norm": 2.0872555692773047, "learning_rate": 8.445592535826643e-06, "loss": 0.1761, "step": 3080 }, { "epoch": 0.84, "grad_norm": 2.1181738011558635, "learning_rate": 8.444524485389936e-06, "loss": 0.1456, "step": 3081 }, { "epoch": 0.84, "grad_norm": 2.4989665979744675, "learning_rate": 8.443456135729458e-06, "loss": 0.188, "step": 3082 }, { "epoch": 0.84, "grad_norm": 2.5422381304376507, "learning_rate": 8.442387486938013e-06, "loss": 0.1858, "step": 3083 }, { "epoch": 0.84, "grad_norm": 2.590302299506852, "learning_rate": 8.441318539108433e-06, "loss": 0.2033, "step": 3084 }, { "epoch": 0.84, "grad_norm": 2.3115779754728045, "learning_rate": 8.440249292333583e-06, "loss": 0.1878, "step": 3085 }, { "epoch": 0.84, "grad_norm": 2.3282731352024673, "learning_rate": 8.439179746706343e-06, "loss": 0.1708, "step": 3086 }, { "epoch": 0.84, "grad_norm": 2.75669156663873, "learning_rate": 8.438109902319622e-06, "loss": 0.2234, "step": 3087 }, { "epoch": 0.84, "grad_norm": 2.1802476879487545, "learning_rate": 8.437039759266364e-06, "loss": 0.1879, "step": 3088 }, { "epoch": 0.84, "grad_norm": 2.412437474357647, "learning_rate": 8.435969317639522e-06, "loss": 0.1834, "step": 3089 }, { "epoch": 0.84, "grad_norm": 2.2251027093776927, "learning_rate": 8.434898577532094e-06, "loss": 0.1928, "step": 3090 }, { "epoch": 0.84, "grad_norm": 2.5865316177069135, "learning_rate": 8.433827539037088e-06, "loss": 0.1917, "step": 3091 }, { "epoch": 0.84, "grad_norm": 2.578706726599439, "learning_rate": 8.432756202247547e-06, "loss": 0.2108, "step": 3092 }, { "epoch": 0.84, "grad_norm": 2.8375642700619648, "learning_rate": 8.431684567256537e-06, "loss": 0.1851, "step": 3093 }, { "epoch": 0.84, "grad_norm": 2.259006862859739, "learning_rate": 8.430612634157152e-06, "loss": 0.187, "step": 3094 }, { "epoch": 0.84, "grad_norm": 2.2474961826152158, "learning_rate": 8.429540403042507e-06, "loss": 0.2066, "step": 3095 }, { "epoch": 0.85, "grad_norm": 2.738646810269791, "learning_rate": 8.42846787400575e-06, "loss": 0.1719, "step": 3096 }, { "epoch": 0.85, "grad_norm": 1.9865551837167141, "learning_rate": 8.427395047140046e-06, "loss": 0.1636, "step": 3097 }, { "epoch": 0.85, "grad_norm": 2.9476311838629043, "learning_rate": 8.426321922538594e-06, "loss": 0.213, "step": 3098 }, { "epoch": 0.85, "grad_norm": 2.316636072549775, "learning_rate": 8.425248500294616e-06, "loss": 0.1726, "step": 3099 }, { "epoch": 0.85, "grad_norm": 2.0406642084271187, "learning_rate": 8.424174780501359e-06, "loss": 0.1494, "step": 3100 }, { "epoch": 0.85, "grad_norm": 2.3455637830238585, "learning_rate": 8.423100763252094e-06, "loss": 0.1637, "step": 3101 }, { "epoch": 0.85, "grad_norm": 2.499855280905456, "learning_rate": 8.422026448640124e-06, "loss": 0.2014, "step": 3102 }, { "epoch": 0.85, "grad_norm": 2.331657645185503, "learning_rate": 8.420951836758774e-06, "loss": 0.1723, "step": 3103 }, { "epoch": 0.85, "grad_norm": 2.3615940104180857, "learning_rate": 8.41987692770139e-06, "loss": 0.1931, "step": 3104 }, { "epoch": 0.85, "grad_norm": 2.2436713661654646, "learning_rate": 8.418801721561355e-06, "loss": 0.1838, "step": 3105 }, { "epoch": 0.85, "grad_norm": 2.427231359923422, "learning_rate": 8.417726218432065e-06, "loss": 0.1825, "step": 3106 }, { "epoch": 0.85, "grad_norm": 2.3052245007264798, "learning_rate": 8.416650418406956e-06, "loss": 0.1901, "step": 3107 }, { "epoch": 0.85, "grad_norm": 2.2037188012982165, "learning_rate": 8.415574321579474e-06, "loss": 0.1778, "step": 3108 }, { "epoch": 0.85, "grad_norm": 2.2248530678225924, "learning_rate": 8.414497928043104e-06, "loss": 0.191, "step": 3109 }, { "epoch": 0.85, "grad_norm": 3.489295525766973, "learning_rate": 8.413421237891352e-06, "loss": 0.2177, "step": 3110 }, { "epoch": 0.85, "grad_norm": 2.1842004842443665, "learning_rate": 8.412344251217746e-06, "loss": 0.1683, "step": 3111 }, { "epoch": 0.85, "grad_norm": 2.270047635910235, "learning_rate": 8.411266968115847e-06, "loss": 0.2077, "step": 3112 }, { "epoch": 0.85, "grad_norm": 2.1802499926960626, "learning_rate": 8.410189388679234e-06, "loss": 0.1844, "step": 3113 }, { "epoch": 0.85, "grad_norm": 2.623144868565555, "learning_rate": 8.409111513001519e-06, "loss": 0.2277, "step": 3114 }, { "epoch": 0.85, "grad_norm": 2.3706240208993665, "learning_rate": 8.408033341176333e-06, "loss": 0.2081, "step": 3115 }, { "epoch": 0.85, "grad_norm": 2.239003050831606, "learning_rate": 8.406954873297342e-06, "loss": 0.1768, "step": 3116 }, { "epoch": 0.85, "grad_norm": 2.093780183056687, "learning_rate": 8.405876109458225e-06, "loss": 0.1933, "step": 3117 }, { "epoch": 0.85, "grad_norm": 2.4175316107952383, "learning_rate": 8.404797049752697e-06, "loss": 0.2196, "step": 3118 }, { "epoch": 0.85, "grad_norm": 2.403451209470031, "learning_rate": 8.403717694274498e-06, "loss": 0.1985, "step": 3119 }, { "epoch": 0.85, "grad_norm": 2.165214059904794, "learning_rate": 8.402638043117384e-06, "loss": 0.1801, "step": 3120 }, { "epoch": 0.85, "grad_norm": 2.4554409846973364, "learning_rate": 8.401558096375149e-06, "loss": 0.2003, "step": 3121 }, { "epoch": 0.85, "grad_norm": 2.4169821631854447, "learning_rate": 8.400477854141606e-06, "loss": 0.202, "step": 3122 }, { "epoch": 0.85, "grad_norm": 2.60438028892301, "learning_rate": 8.399397316510596e-06, "loss": 0.1805, "step": 3123 }, { "epoch": 0.85, "grad_norm": 2.617154542372659, "learning_rate": 8.398316483575981e-06, "loss": 0.1991, "step": 3124 }, { "epoch": 0.85, "grad_norm": 2.189831265922532, "learning_rate": 8.397235355431656e-06, "loss": 0.1776, "step": 3125 }, { "epoch": 0.85, "grad_norm": 2.533707995638101, "learning_rate": 8.396153932171538e-06, "loss": 0.2125, "step": 3126 }, { "epoch": 0.85, "grad_norm": 2.2225113045189127, "learning_rate": 8.395072213889567e-06, "loss": 0.1739, "step": 3127 }, { "epoch": 0.85, "grad_norm": 2.2039562971032574, "learning_rate": 8.393990200679714e-06, "loss": 0.1546, "step": 3128 }, { "epoch": 0.85, "grad_norm": 2.5193649829042535, "learning_rate": 8.39290789263597e-06, "loss": 0.197, "step": 3129 }, { "epoch": 0.85, "grad_norm": 2.290784657932828, "learning_rate": 8.391825289852355e-06, "loss": 0.2148, "step": 3130 }, { "epoch": 0.85, "grad_norm": 2.118159324819893, "learning_rate": 8.390742392422916e-06, "loss": 0.1718, "step": 3131 }, { "epoch": 0.86, "grad_norm": 2.1319792493852017, "learning_rate": 8.389659200441722e-06, "loss": 0.1792, "step": 3132 }, { "epoch": 0.86, "grad_norm": 2.222421329953683, "learning_rate": 8.388575714002872e-06, "loss": 0.1979, "step": 3133 }, { "epoch": 0.86, "grad_norm": 2.3475733503971337, "learning_rate": 8.387491933200483e-06, "loss": 0.154, "step": 3134 }, { "epoch": 0.86, "grad_norm": 2.3936045453675576, "learning_rate": 8.386407858128707e-06, "loss": 0.1923, "step": 3135 }, { "epoch": 0.86, "grad_norm": 2.287446122286075, "learning_rate": 8.385323488881714e-06, "loss": 0.1614, "step": 3136 }, { "epoch": 0.86, "grad_norm": 2.621889330018262, "learning_rate": 8.384238825553704e-06, "loss": 0.207, "step": 3137 }, { "epoch": 0.86, "grad_norm": 2.238718143378392, "learning_rate": 8.383153868238898e-06, "loss": 0.1701, "step": 3138 }, { "epoch": 0.86, "grad_norm": 2.1291662501656803, "learning_rate": 8.382068617031552e-06, "loss": 0.1736, "step": 3139 }, { "epoch": 0.86, "grad_norm": 2.3343925441577413, "learning_rate": 8.380983072025934e-06, "loss": 0.1717, "step": 3140 }, { "epoch": 0.86, "grad_norm": 2.2028058505377515, "learning_rate": 8.37989723331635e-06, "loss": 0.1625, "step": 3141 }, { "epoch": 0.86, "grad_norm": 2.342067928948073, "learning_rate": 8.378811100997122e-06, "loss": 0.2015, "step": 3142 }, { "epoch": 0.86, "grad_norm": 2.408933931685575, "learning_rate": 8.377724675162607e-06, "loss": 0.1793, "step": 3143 }, { "epoch": 0.86, "grad_norm": 2.572768283139493, "learning_rate": 8.376637955907176e-06, "loss": 0.2256, "step": 3144 }, { "epoch": 0.86, "grad_norm": 2.1310873301182287, "learning_rate": 8.375550943325235e-06, "loss": 0.1638, "step": 3145 }, { "epoch": 0.86, "grad_norm": 2.463516843479315, "learning_rate": 8.374463637511212e-06, "loss": 0.2336, "step": 3146 }, { "epoch": 0.86, "grad_norm": 1.969188980344144, "learning_rate": 8.37337603855956e-06, "loss": 0.152, "step": 3147 }, { "epoch": 0.86, "grad_norm": 2.496477007147593, "learning_rate": 8.372288146564757e-06, "loss": 0.2056, "step": 3148 }, { "epoch": 0.86, "grad_norm": 2.135154155548586, "learning_rate": 8.371199961621312e-06, "loss": 0.1861, "step": 3149 }, { "epoch": 0.86, "grad_norm": 2.3125600317055137, "learning_rate": 8.370111483823749e-06, "loss": 0.207, "step": 3150 }, { "epoch": 0.86, "grad_norm": 2.483728856043701, "learning_rate": 8.369022713266629e-06, "loss": 0.1885, "step": 3151 }, { "epoch": 0.86, "grad_norm": 2.185622752091959, "learning_rate": 8.367933650044526e-06, "loss": 0.1773, "step": 3152 }, { "epoch": 0.86, "grad_norm": 2.4317937062140706, "learning_rate": 8.366844294252054e-06, "loss": 0.1909, "step": 3153 }, { "epoch": 0.86, "grad_norm": 2.2057446814003066, "learning_rate": 8.365754645983839e-06, "loss": 0.1998, "step": 3154 }, { "epoch": 0.86, "grad_norm": 2.2408568551905246, "learning_rate": 8.36466470533454e-06, "loss": 0.183, "step": 3155 }, { "epoch": 0.86, "grad_norm": 2.187634157667538, "learning_rate": 8.363574472398841e-06, "loss": 0.1812, "step": 3156 }, { "epoch": 0.86, "grad_norm": 2.428166441488641, "learning_rate": 8.362483947271446e-06, "loss": 0.1812, "step": 3157 }, { "epoch": 0.86, "grad_norm": 2.303233504588605, "learning_rate": 8.361393130047093e-06, "loss": 0.1894, "step": 3158 }, { "epoch": 0.86, "grad_norm": 2.1228275158824457, "learning_rate": 8.360302020820538e-06, "loss": 0.1653, "step": 3159 }, { "epoch": 0.86, "grad_norm": 2.6528927843667383, "learning_rate": 8.359210619686565e-06, "loss": 0.2106, "step": 3160 }, { "epoch": 0.86, "grad_norm": 2.059399745173855, "learning_rate": 8.358118926739984e-06, "loss": 0.1607, "step": 3161 }, { "epoch": 0.86, "grad_norm": 2.1633688696032256, "learning_rate": 8.35702694207563e-06, "loss": 0.1848, "step": 3162 }, { "epoch": 0.86, "grad_norm": 1.9429359947337626, "learning_rate": 8.355934665788361e-06, "loss": 0.1565, "step": 3163 }, { "epoch": 0.86, "grad_norm": 2.124194516661543, "learning_rate": 8.354842097973065e-06, "loss": 0.1796, "step": 3164 }, { "epoch": 0.86, "grad_norm": 2.5037622521669856, "learning_rate": 8.35374923872465e-06, "loss": 0.1994, "step": 3165 }, { "epoch": 0.86, "grad_norm": 2.271565297502334, "learning_rate": 8.352656088138056e-06, "loss": 0.1799, "step": 3166 }, { "epoch": 0.86, "grad_norm": 1.997459133750597, "learning_rate": 8.35156264630824e-06, "loss": 0.1507, "step": 3167 }, { "epoch": 0.86, "grad_norm": 2.3259655677813296, "learning_rate": 8.350468913330192e-06, "loss": 0.191, "step": 3168 }, { "epoch": 0.87, "grad_norm": 2.3097531818512205, "learning_rate": 8.349374889298923e-06, "loss": 0.2023, "step": 3169 }, { "epoch": 0.87, "grad_norm": 2.627053274025339, "learning_rate": 8.348280574309468e-06, "loss": 0.181, "step": 3170 }, { "epoch": 0.87, "grad_norm": 2.495673766202746, "learning_rate": 8.347185968456891e-06, "loss": 0.2025, "step": 3171 }, { "epoch": 0.87, "grad_norm": 2.278958573588023, "learning_rate": 8.346091071836281e-06, "loss": 0.1756, "step": 3172 }, { "epoch": 0.87, "grad_norm": 2.0677608065011257, "learning_rate": 8.34499588454275e-06, "loss": 0.1665, "step": 3173 }, { "epoch": 0.87, "grad_norm": 2.1306891930787017, "learning_rate": 8.343900406671434e-06, "loss": 0.151, "step": 3174 }, { "epoch": 0.87, "grad_norm": 2.178018017246907, "learning_rate": 8.342804638317502e-06, "loss": 0.1824, "step": 3175 }, { "epoch": 0.87, "grad_norm": 2.3232082669398086, "learning_rate": 8.341708579576138e-06, "loss": 0.2091, "step": 3176 }, { "epoch": 0.87, "grad_norm": 2.1334689352633336, "learning_rate": 8.340612230542557e-06, "loss": 0.1791, "step": 3177 }, { "epoch": 0.87, "grad_norm": 2.2534095598490946, "learning_rate": 8.339515591312e-06, "loss": 0.1651, "step": 3178 }, { "epoch": 0.87, "grad_norm": 2.265424416753896, "learning_rate": 8.338418661979729e-06, "loss": 0.1869, "step": 3179 }, { "epoch": 0.87, "grad_norm": 2.374421167726862, "learning_rate": 8.337321442641036e-06, "loss": 0.1945, "step": 3180 }, { "epoch": 0.87, "grad_norm": 2.3292860052044833, "learning_rate": 8.336223933391232e-06, "loss": 0.164, "step": 3181 }, { "epoch": 0.87, "grad_norm": 2.257221654444349, "learning_rate": 8.33512613432566e-06, "loss": 0.1412, "step": 3182 }, { "epoch": 0.87, "grad_norm": 2.1604269318398046, "learning_rate": 8.334028045539685e-06, "loss": 0.161, "step": 3183 }, { "epoch": 0.87, "grad_norm": 2.6048587704344675, "learning_rate": 8.332929667128698e-06, "loss": 0.2226, "step": 3184 }, { "epoch": 0.87, "grad_norm": 2.09938770425246, "learning_rate": 8.33183099918811e-06, "loss": 0.1715, "step": 3185 }, { "epoch": 0.87, "grad_norm": 2.1683859617254693, "learning_rate": 8.330732041813367e-06, "loss": 0.1821, "step": 3186 }, { "epoch": 0.87, "grad_norm": 2.4259187194768157, "learning_rate": 8.329632795099934e-06, "loss": 0.2045, "step": 3187 }, { "epoch": 0.87, "grad_norm": 2.218391832587804, "learning_rate": 8.328533259143298e-06, "loss": 0.1693, "step": 3188 }, { "epoch": 0.87, "grad_norm": 2.2756406388422636, "learning_rate": 8.327433434038979e-06, "loss": 0.1927, "step": 3189 }, { "epoch": 0.87, "grad_norm": 2.456680147484425, "learning_rate": 8.326333319882516e-06, "loss": 0.2015, "step": 3190 }, { "epoch": 0.87, "grad_norm": 2.132591417501493, "learning_rate": 8.325232916769477e-06, "loss": 0.1811, "step": 3191 }, { "epoch": 0.87, "grad_norm": 2.155834474540361, "learning_rate": 8.324132224795453e-06, "loss": 0.2041, "step": 3192 }, { "epoch": 0.87, "grad_norm": 2.2093705082676047, "learning_rate": 8.323031244056058e-06, "loss": 0.1858, "step": 3193 }, { "epoch": 0.87, "grad_norm": 2.470273888322643, "learning_rate": 8.321929974646936e-06, "loss": 0.1899, "step": 3194 }, { "epoch": 0.87, "grad_norm": 1.7339549245730934, "learning_rate": 8.320828416663753e-06, "loss": 0.1525, "step": 3195 }, { "epoch": 0.87, "grad_norm": 1.9586981936767238, "learning_rate": 8.319726570202201e-06, "loss": 0.151, "step": 3196 }, { "epoch": 0.87, "grad_norm": 2.363058366062218, "learning_rate": 8.318624435357995e-06, "loss": 0.1842, "step": 3197 }, { "epoch": 0.87, "grad_norm": 2.801335386185857, "learning_rate": 8.31752201222688e-06, "loss": 0.2212, "step": 3198 }, { "epoch": 0.87, "grad_norm": 2.1806504636883726, "learning_rate": 8.316419300904622e-06, "loss": 0.1885, "step": 3199 }, { "epoch": 0.87, "grad_norm": 2.45678693859772, "learning_rate": 8.315316301487009e-06, "loss": 0.2254, "step": 3200 }, { "epoch": 0.87, "grad_norm": 2.3243264293220367, "learning_rate": 8.31421301406986e-06, "loss": 0.1747, "step": 3201 }, { "epoch": 0.87, "grad_norm": 2.4487585774361067, "learning_rate": 8.313109438749021e-06, "loss": 0.1936, "step": 3202 }, { "epoch": 0.87, "grad_norm": 2.093960751141969, "learning_rate": 8.312005575620355e-06, "loss": 0.13, "step": 3203 }, { "epoch": 0.87, "grad_norm": 2.4272617352385084, "learning_rate": 8.310901424779752e-06, "loss": 0.1932, "step": 3204 }, { "epoch": 0.87, "grad_norm": 2.201052640840292, "learning_rate": 8.309796986323135e-06, "loss": 0.1998, "step": 3205 }, { "epoch": 0.88, "grad_norm": 2.394518667058749, "learning_rate": 8.308692260346439e-06, "loss": 0.1889, "step": 3206 }, { "epoch": 0.88, "grad_norm": 1.7775850170271346, "learning_rate": 8.307587246945636e-06, "loss": 0.1477, "step": 3207 }, { "epoch": 0.88, "grad_norm": 2.134855455428797, "learning_rate": 8.306481946216716e-06, "loss": 0.1662, "step": 3208 }, { "epoch": 0.88, "grad_norm": 2.4727849088191625, "learning_rate": 8.305376358255695e-06, "loss": 0.2219, "step": 3209 }, { "epoch": 0.88, "grad_norm": 2.1528447216291586, "learning_rate": 8.304270483158617e-06, "loss": 0.1931, "step": 3210 }, { "epoch": 0.88, "grad_norm": 2.3480119030798785, "learning_rate": 8.303164321021547e-06, "loss": 0.1877, "step": 3211 }, { "epoch": 0.88, "grad_norm": 2.0817444970144234, "learning_rate": 8.302057871940577e-06, "loss": 0.1733, "step": 3212 }, { "epoch": 0.88, "grad_norm": 2.2849706467352564, "learning_rate": 8.300951136011824e-06, "loss": 0.2033, "step": 3213 }, { "epoch": 0.88, "grad_norm": 2.0795125906451535, "learning_rate": 8.299844113331428e-06, "loss": 0.1777, "step": 3214 }, { "epoch": 0.88, "grad_norm": 1.9746048251693091, "learning_rate": 8.298736803995558e-06, "loss": 0.1653, "step": 3215 }, { "epoch": 0.88, "grad_norm": 2.32948345963972, "learning_rate": 8.297629208100402e-06, "loss": 0.1799, "step": 3216 }, { "epoch": 0.88, "grad_norm": 2.1454475713257284, "learning_rate": 8.296521325742178e-06, "loss": 0.1797, "step": 3217 }, { "epoch": 0.88, "grad_norm": 2.29301092899836, "learning_rate": 8.295413157017127e-06, "loss": 0.195, "step": 3218 }, { "epoch": 0.88, "grad_norm": 2.150827013458793, "learning_rate": 8.294304702021515e-06, "loss": 0.1692, "step": 3219 }, { "epoch": 0.88, "grad_norm": 2.1776073753888765, "learning_rate": 8.293195960851634e-06, "loss": 0.1796, "step": 3220 }, { "epoch": 0.88, "grad_norm": 2.2560544368660844, "learning_rate": 8.292086933603799e-06, "loss": 0.2095, "step": 3221 }, { "epoch": 0.88, "grad_norm": 2.187506780002228, "learning_rate": 8.290977620374348e-06, "loss": 0.1972, "step": 3222 }, { "epoch": 0.88, "grad_norm": 2.357901176192247, "learning_rate": 8.28986802125965e-06, "loss": 0.1953, "step": 3223 }, { "epoch": 0.88, "grad_norm": 2.2398928673990413, "learning_rate": 8.288758136356093e-06, "loss": 0.1966, "step": 3224 }, { "epoch": 0.88, "grad_norm": 2.8708969413786636, "learning_rate": 8.287647965760092e-06, "loss": 0.2051, "step": 3225 }, { "epoch": 0.88, "grad_norm": 2.076145330091136, "learning_rate": 8.28653750956809e-06, "loss": 0.1505, "step": 3226 }, { "epoch": 0.88, "grad_norm": 2.4300211893441044, "learning_rate": 8.285426767876546e-06, "loss": 0.1803, "step": 3227 }, { "epoch": 0.88, "grad_norm": 2.533166184322002, "learning_rate": 8.284315740781953e-06, "loss": 0.2261, "step": 3228 }, { "epoch": 0.88, "grad_norm": 2.0927025673180455, "learning_rate": 8.283204428380826e-06, "loss": 0.1569, "step": 3229 }, { "epoch": 0.88, "grad_norm": 2.1302617408923785, "learning_rate": 8.282092830769703e-06, "loss": 0.1704, "step": 3230 }, { "epoch": 0.88, "grad_norm": 2.2232777757208675, "learning_rate": 8.280980948045146e-06, "loss": 0.1766, "step": 3231 }, { "epoch": 0.88, "grad_norm": 2.3813699646542235, "learning_rate": 8.279868780303745e-06, "loss": 0.192, "step": 3232 }, { "epoch": 0.88, "grad_norm": 2.2768997991403905, "learning_rate": 8.278756327642116e-06, "loss": 0.1874, "step": 3233 }, { "epoch": 0.88, "grad_norm": 2.2832862280711472, "learning_rate": 8.277643590156893e-06, "loss": 0.2008, "step": 3234 }, { "epoch": 0.88, "grad_norm": 2.064851399007841, "learning_rate": 8.276530567944742e-06, "loss": 0.172, "step": 3235 }, { "epoch": 0.88, "grad_norm": 2.410946951271658, "learning_rate": 8.27541726110235e-06, "loss": 0.1931, "step": 3236 }, { "epoch": 0.88, "grad_norm": 2.4255657343587296, "learning_rate": 8.274303669726427e-06, "loss": 0.2167, "step": 3237 }, { "epoch": 0.88, "grad_norm": 2.148485397334108, "learning_rate": 8.273189793913711e-06, "loss": 0.1732, "step": 3238 }, { "epoch": 0.88, "grad_norm": 2.3862713143435936, "learning_rate": 8.272075633760966e-06, "loss": 0.2084, "step": 3239 }, { "epoch": 0.88, "grad_norm": 2.5697572088737193, "learning_rate": 8.270961189364974e-06, "loss": 0.1892, "step": 3240 }, { "epoch": 0.88, "grad_norm": 2.223355889643228, "learning_rate": 8.26984646082255e-06, "loss": 0.1754, "step": 3241 }, { "epoch": 0.89, "grad_norm": 2.1495131546296182, "learning_rate": 8.268731448230527e-06, "loss": 0.1737, "step": 3242 }, { "epoch": 0.89, "grad_norm": 2.13242767275266, "learning_rate": 8.267616151685768e-06, "loss": 0.1817, "step": 3243 }, { "epoch": 0.89, "grad_norm": 2.2890027917155145, "learning_rate": 8.266500571285159e-06, "loss": 0.1949, "step": 3244 }, { "epoch": 0.89, "grad_norm": 2.500480168591124, "learning_rate": 8.265384707125607e-06, "loss": 0.219, "step": 3245 }, { "epoch": 0.89, "grad_norm": 2.085684452421729, "learning_rate": 8.264268559304046e-06, "loss": 0.1745, "step": 3246 }, { "epoch": 0.89, "grad_norm": 2.136115020302101, "learning_rate": 8.263152127917438e-06, "loss": 0.1718, "step": 3247 }, { "epoch": 0.89, "grad_norm": 2.063594587049693, "learning_rate": 8.262035413062763e-06, "loss": 0.1604, "step": 3248 }, { "epoch": 0.89, "grad_norm": 2.3298035804113724, "learning_rate": 8.260918414837034e-06, "loss": 0.1806, "step": 3249 }, { "epoch": 0.89, "grad_norm": 2.088763092076401, "learning_rate": 8.25980113333728e-06, "loss": 0.1824, "step": 3250 }, { "epoch": 0.89, "grad_norm": 2.338366015080485, "learning_rate": 8.258683568660561e-06, "loss": 0.1972, "step": 3251 }, { "epoch": 0.89, "grad_norm": 2.462788790077738, "learning_rate": 8.257565720903957e-06, "loss": 0.1773, "step": 3252 }, { "epoch": 0.89, "grad_norm": 2.1223564596571878, "learning_rate": 8.256447590164576e-06, "loss": 0.1964, "step": 3253 }, { "epoch": 0.89, "grad_norm": 2.391580740908043, "learning_rate": 8.255329176539552e-06, "loss": 0.1953, "step": 3254 }, { "epoch": 0.89, "grad_norm": 2.210927125449662, "learning_rate": 8.254210480126036e-06, "loss": 0.1922, "step": 3255 }, { "epoch": 0.89, "grad_norm": 2.053793442990861, "learning_rate": 8.25309150102121e-06, "loss": 0.1543, "step": 3256 }, { "epoch": 0.89, "grad_norm": 2.0110743721123066, "learning_rate": 8.251972239322283e-06, "loss": 0.159, "step": 3257 }, { "epoch": 0.89, "grad_norm": 2.064272753404483, "learning_rate": 8.250852695126478e-06, "loss": 0.16, "step": 3258 }, { "epoch": 0.89, "grad_norm": 2.36416064058127, "learning_rate": 8.249732868531056e-06, "loss": 0.1903, "step": 3259 }, { "epoch": 0.89, "grad_norm": 2.2913187130041472, "learning_rate": 8.24861275963329e-06, "loss": 0.2064, "step": 3260 }, { "epoch": 0.89, "grad_norm": 2.3229313929607476, "learning_rate": 8.247492368530485e-06, "loss": 0.186, "step": 3261 }, { "epoch": 0.89, "grad_norm": 2.2712649861955145, "learning_rate": 8.246371695319968e-06, "loss": 0.1759, "step": 3262 }, { "epoch": 0.89, "grad_norm": 2.2843930624863416, "learning_rate": 8.245250740099095e-06, "loss": 0.18, "step": 3263 }, { "epoch": 0.89, "grad_norm": 2.2399401489642865, "learning_rate": 8.244129502965239e-06, "loss": 0.1816, "step": 3264 }, { "epoch": 0.89, "grad_norm": 2.358125578004363, "learning_rate": 8.243007984015801e-06, "loss": 0.2038, "step": 3265 }, { "epoch": 0.89, "grad_norm": 2.540874782184986, "learning_rate": 8.24188618334821e-06, "loss": 0.2072, "step": 3266 }, { "epoch": 0.89, "grad_norm": 2.5915999466287727, "learning_rate": 8.240764101059913e-06, "loss": 0.2011, "step": 3267 }, { "epoch": 0.89, "grad_norm": 2.140998716900764, "learning_rate": 8.239641737248386e-06, "loss": 0.1814, "step": 3268 }, { "epoch": 0.89, "grad_norm": 2.1999473038819537, "learning_rate": 8.238519092011125e-06, "loss": 0.1835, "step": 3269 }, { "epoch": 0.89, "grad_norm": 2.0586290044788584, "learning_rate": 8.237396165445661e-06, "loss": 0.1557, "step": 3270 }, { "epoch": 0.89, "grad_norm": 2.140028167761064, "learning_rate": 8.236272957649534e-06, "loss": 0.1545, "step": 3271 }, { "epoch": 0.89, "grad_norm": 2.474330908193338, "learning_rate": 8.23514946872032e-06, "loss": 0.1623, "step": 3272 }, { "epoch": 0.89, "grad_norm": 2.435467950221756, "learning_rate": 8.234025698755616e-06, "loss": 0.1792, "step": 3273 }, { "epoch": 0.89, "grad_norm": 2.2334372698809437, "learning_rate": 8.232901647853043e-06, "loss": 0.1605, "step": 3274 }, { "epoch": 0.89, "grad_norm": 2.1493466075789187, "learning_rate": 8.231777316110245e-06, "loss": 0.1803, "step": 3275 }, { "epoch": 0.89, "grad_norm": 2.003315284589848, "learning_rate": 8.230652703624893e-06, "loss": 0.1557, "step": 3276 }, { "epoch": 0.89, "grad_norm": 2.043228461711188, "learning_rate": 8.229527810494682e-06, "loss": 0.1599, "step": 3277 }, { "epoch": 0.89, "grad_norm": 1.9786669354200614, "learning_rate": 8.228402636817331e-06, "loss": 0.1461, "step": 3278 }, { "epoch": 0.9, "grad_norm": 2.2086720767807426, "learning_rate": 8.227277182690582e-06, "loss": 0.1667, "step": 3279 }, { "epoch": 0.9, "grad_norm": 2.3602797058448477, "learning_rate": 8.226151448212202e-06, "loss": 0.1797, "step": 3280 }, { "epoch": 0.9, "grad_norm": 2.225265777224894, "learning_rate": 8.225025433479987e-06, "loss": 0.1833, "step": 3281 }, { "epoch": 0.9, "grad_norm": 2.2059809692911934, "learning_rate": 8.22389913859175e-06, "loss": 0.1687, "step": 3282 }, { "epoch": 0.9, "grad_norm": 2.159812376875455, "learning_rate": 8.222772563645329e-06, "loss": 0.184, "step": 3283 }, { "epoch": 0.9, "grad_norm": 2.228120479440467, "learning_rate": 8.221645708738594e-06, "loss": 0.1806, "step": 3284 }, { "epoch": 0.9, "grad_norm": 2.4583856589863826, "learning_rate": 8.220518573969432e-06, "loss": 0.1798, "step": 3285 }, { "epoch": 0.9, "grad_norm": 2.191022395410361, "learning_rate": 8.219391159435755e-06, "loss": 0.1828, "step": 3286 }, { "epoch": 0.9, "grad_norm": 2.356795918174125, "learning_rate": 8.218263465235502e-06, "loss": 0.2138, "step": 3287 }, { "epoch": 0.9, "grad_norm": 2.4955424270318827, "learning_rate": 8.217135491466636e-06, "loss": 0.1955, "step": 3288 }, { "epoch": 0.9, "grad_norm": 2.156001927303675, "learning_rate": 8.216007238227142e-06, "loss": 0.1834, "step": 3289 }, { "epoch": 0.9, "grad_norm": 2.1896506011684567, "learning_rate": 8.214878705615033e-06, "loss": 0.1682, "step": 3290 }, { "epoch": 0.9, "grad_norm": 2.382573553593165, "learning_rate": 8.213749893728342e-06, "loss": 0.1453, "step": 3291 }, { "epoch": 0.9, "grad_norm": 2.148992685201197, "learning_rate": 8.212620802665127e-06, "loss": 0.1795, "step": 3292 }, { "epoch": 0.9, "grad_norm": 2.251030436578612, "learning_rate": 8.211491432523474e-06, "loss": 0.1511, "step": 3293 }, { "epoch": 0.9, "grad_norm": 2.2823839686944583, "learning_rate": 8.210361783401491e-06, "loss": 0.187, "step": 3294 }, { "epoch": 0.9, "grad_norm": 2.2377797315918566, "learning_rate": 8.209231855397309e-06, "loss": 0.1606, "step": 3295 }, { "epoch": 0.9, "grad_norm": 2.3382576365493137, "learning_rate": 8.208101648609082e-06, "loss": 0.1841, "step": 3296 }, { "epoch": 0.9, "grad_norm": 2.235197394522637, "learning_rate": 8.206971163134992e-06, "loss": 0.1892, "step": 3297 }, { "epoch": 0.9, "grad_norm": 2.5282837116080947, "learning_rate": 8.205840399073245e-06, "loss": 0.183, "step": 3298 }, { "epoch": 0.9, "grad_norm": 2.4616697894816446, "learning_rate": 8.204709356522069e-06, "loss": 0.1895, "step": 3299 }, { "epoch": 0.9, "grad_norm": 2.434362636902406, "learning_rate": 8.203578035579716e-06, "loss": 0.1662, "step": 3300 }, { "epoch": 0.9, "grad_norm": 2.3371972078139636, "learning_rate": 8.202446436344463e-06, "loss": 0.1983, "step": 3301 }, { "epoch": 0.9, "grad_norm": 2.435275274808842, "learning_rate": 8.201314558914613e-06, "loss": 0.1979, "step": 3302 }, { "epoch": 0.9, "grad_norm": 2.4478602119627815, "learning_rate": 8.20018240338849e-06, "loss": 0.2396, "step": 3303 }, { "epoch": 0.9, "grad_norm": 2.625698385241079, "learning_rate": 8.199049969864445e-06, "loss": 0.2171, "step": 3304 }, { "epoch": 0.9, "grad_norm": 2.505762837331745, "learning_rate": 8.197917258440851e-06, "loss": 0.1549, "step": 3305 }, { "epoch": 0.9, "grad_norm": 2.0453110101659964, "learning_rate": 8.196784269216107e-06, "loss": 0.1789, "step": 3306 }, { "epoch": 0.9, "grad_norm": 2.2866626290225827, "learning_rate": 8.195651002288633e-06, "loss": 0.1898, "step": 3307 }, { "epoch": 0.9, "grad_norm": 2.283789599413208, "learning_rate": 8.194517457756877e-06, "loss": 0.1957, "step": 3308 }, { "epoch": 0.9, "grad_norm": 2.4412990804505323, "learning_rate": 8.193383635719308e-06, "loss": 0.1838, "step": 3309 }, { "epoch": 0.9, "grad_norm": 2.138029238240916, "learning_rate": 8.192249536274421e-06, "loss": 0.1817, "step": 3310 }, { "epoch": 0.9, "grad_norm": 2.324398080938735, "learning_rate": 8.191115159520735e-06, "loss": 0.211, "step": 3311 }, { "epoch": 0.9, "grad_norm": 2.2155668721038135, "learning_rate": 8.189980505556793e-06, "loss": 0.1935, "step": 3312 }, { "epoch": 0.9, "grad_norm": 2.4183581811832786, "learning_rate": 8.188845574481162e-06, "loss": 0.1902, "step": 3313 }, { "epoch": 0.9, "grad_norm": 2.38784633428729, "learning_rate": 8.187710366392431e-06, "loss": 0.1862, "step": 3314 }, { "epoch": 0.9, "grad_norm": 2.426471563918452, "learning_rate": 8.186574881389216e-06, "loss": 0.2169, "step": 3315 }, { "epoch": 0.91, "grad_norm": 2.6638006352140082, "learning_rate": 8.185439119570154e-06, "loss": 0.2402, "step": 3316 }, { "epoch": 0.91, "grad_norm": 2.112580427314221, "learning_rate": 8.184303081033911e-06, "loss": 0.1782, "step": 3317 }, { "epoch": 0.91, "grad_norm": 2.156329305632411, "learning_rate": 8.183166765879171e-06, "loss": 0.1624, "step": 3318 }, { "epoch": 0.91, "grad_norm": 2.425715856479946, "learning_rate": 8.182030174204648e-06, "loss": 0.222, "step": 3319 }, { "epoch": 0.91, "grad_norm": 2.2116814000944442, "learning_rate": 8.180893306109075e-06, "loss": 0.1789, "step": 3320 }, { "epoch": 0.91, "grad_norm": 2.359940770493828, "learning_rate": 8.179756161691212e-06, "loss": 0.1748, "step": 3321 }, { "epoch": 0.91, "grad_norm": 2.0130173846981254, "learning_rate": 8.178618741049841e-06, "loss": 0.1541, "step": 3322 }, { "epoch": 0.91, "grad_norm": 2.28094935573533, "learning_rate": 8.17748104428377e-06, "loss": 0.1834, "step": 3323 }, { "epoch": 0.91, "grad_norm": 2.3078207516293694, "learning_rate": 8.17634307149183e-06, "loss": 0.1969, "step": 3324 }, { "epoch": 0.91, "grad_norm": 2.2269701877737553, "learning_rate": 8.175204822772875e-06, "loss": 0.1831, "step": 3325 }, { "epoch": 0.91, "grad_norm": 2.2811932793283645, "learning_rate": 8.174066298225785e-06, "loss": 0.1894, "step": 3326 }, { "epoch": 0.91, "grad_norm": 2.157994459926913, "learning_rate": 8.172927497949463e-06, "loss": 0.1875, "step": 3327 }, { "epoch": 0.91, "grad_norm": 2.0926735948127244, "learning_rate": 8.171788422042837e-06, "loss": 0.1723, "step": 3328 }, { "epoch": 0.91, "grad_norm": 2.1976119524902717, "learning_rate": 8.170649070604855e-06, "loss": 0.1886, "step": 3329 }, { "epoch": 0.91, "grad_norm": 2.0687673978670937, "learning_rate": 8.169509443734493e-06, "loss": 0.1873, "step": 3330 }, { "epoch": 0.91, "grad_norm": 2.1869805702470875, "learning_rate": 8.16836954153075e-06, "loss": 0.1958, "step": 3331 }, { "epoch": 0.91, "grad_norm": 2.303212673810355, "learning_rate": 8.167229364092648e-06, "loss": 0.1681, "step": 3332 }, { "epoch": 0.91, "grad_norm": 2.2794315472409097, "learning_rate": 8.166088911519236e-06, "loss": 0.1804, "step": 3333 }, { "epoch": 0.91, "grad_norm": 2.182459423048527, "learning_rate": 8.16494818390958e-06, "loss": 0.1834, "step": 3334 }, { "epoch": 0.91, "grad_norm": 2.1105704925939714, "learning_rate": 8.163807181362778e-06, "loss": 0.1731, "step": 3335 }, { "epoch": 0.91, "grad_norm": 2.3344840179572337, "learning_rate": 8.162665903977947e-06, "loss": 0.2257, "step": 3336 }, { "epoch": 0.91, "grad_norm": 2.178533704416329, "learning_rate": 8.161524351854229e-06, "loss": 0.1764, "step": 3337 }, { "epoch": 0.91, "grad_norm": 2.2327794298956363, "learning_rate": 8.16038252509079e-06, "loss": 0.1864, "step": 3338 }, { "epoch": 0.91, "grad_norm": 2.22333040777405, "learning_rate": 8.15924042378682e-06, "loss": 0.1708, "step": 3339 }, { "epoch": 0.91, "grad_norm": 2.325766604148024, "learning_rate": 8.158098048041534e-06, "loss": 0.196, "step": 3340 }, { "epoch": 0.91, "grad_norm": 2.127296045212197, "learning_rate": 8.156955397954166e-06, "loss": 0.1844, "step": 3341 }, { "epoch": 0.91, "grad_norm": 2.2199497615834316, "learning_rate": 8.15581247362398e-06, "loss": 0.203, "step": 3342 }, { "epoch": 0.91, "grad_norm": 2.3653662688952943, "learning_rate": 8.154669275150259e-06, "loss": 0.1887, "step": 3343 }, { "epoch": 0.91, "grad_norm": 1.8745965289880373, "learning_rate": 8.153525802632314e-06, "loss": 0.1612, "step": 3344 }, { "epoch": 0.91, "grad_norm": 2.117495794730123, "learning_rate": 8.15238205616948e-06, "loss": 0.153, "step": 3345 }, { "epoch": 0.91, "grad_norm": 2.037321887315161, "learning_rate": 8.151238035861108e-06, "loss": 0.1584, "step": 3346 }, { "epoch": 0.91, "grad_norm": 2.196284899438368, "learning_rate": 8.150093741806582e-06, "loss": 0.1971, "step": 3347 }, { "epoch": 0.91, "grad_norm": 2.204382918961807, "learning_rate": 8.148949174105305e-06, "loss": 0.1787, "step": 3348 }, { "epoch": 0.91, "grad_norm": 2.092713372572776, "learning_rate": 8.147804332856705e-06, "loss": 0.162, "step": 3349 }, { "epoch": 0.91, "grad_norm": 2.3956935239184567, "learning_rate": 8.146659218160233e-06, "loss": 0.1915, "step": 3350 }, { "epoch": 0.91, "grad_norm": 2.046048622108676, "learning_rate": 8.145513830115367e-06, "loss": 0.194, "step": 3351 }, { "epoch": 0.92, "grad_norm": 2.298114634723322, "learning_rate": 8.144368168821603e-06, "loss": 0.1856, "step": 3352 }, { "epoch": 0.92, "grad_norm": 2.17841290441522, "learning_rate": 8.143222234378467e-06, "loss": 0.1747, "step": 3353 }, { "epoch": 0.92, "grad_norm": 2.191878202826198, "learning_rate": 8.142076026885504e-06, "loss": 0.1712, "step": 3354 }, { "epoch": 0.92, "grad_norm": 2.377293506076023, "learning_rate": 8.140929546442282e-06, "loss": 0.2064, "step": 3355 }, { "epoch": 0.92, "grad_norm": 2.1353715101449913, "learning_rate": 8.1397827931484e-06, "loss": 0.1638, "step": 3356 }, { "epoch": 0.92, "grad_norm": 2.33039694183237, "learning_rate": 8.13863576710347e-06, "loss": 0.1741, "step": 3357 }, { "epoch": 0.92, "grad_norm": 2.3258499626196913, "learning_rate": 8.13748846840714e-06, "loss": 0.2203, "step": 3358 }, { "epoch": 0.92, "grad_norm": 2.3207823728598234, "learning_rate": 8.136340897159071e-06, "loss": 0.1852, "step": 3359 }, { "epoch": 0.92, "grad_norm": 2.2941575772141247, "learning_rate": 8.135193053458952e-06, "loss": 0.1817, "step": 3360 }, { "epoch": 0.92, "grad_norm": 2.0603519800074728, "learning_rate": 8.134044937406496e-06, "loss": 0.1469, "step": 3361 }, { "epoch": 0.92, "grad_norm": 2.5186651452807243, "learning_rate": 8.13289654910144e-06, "loss": 0.1841, "step": 3362 }, { "epoch": 0.92, "grad_norm": 2.162276033773308, "learning_rate": 8.131747888643541e-06, "loss": 0.2057, "step": 3363 }, { "epoch": 0.92, "grad_norm": 1.9764896077994298, "learning_rate": 8.130598956132587e-06, "loss": 0.1523, "step": 3364 }, { "epoch": 0.92, "grad_norm": 2.330984831901305, "learning_rate": 8.129449751668382e-06, "loss": 0.1965, "step": 3365 }, { "epoch": 0.92, "grad_norm": 2.1995998810972517, "learning_rate": 8.128300275350756e-06, "loss": 0.1759, "step": 3366 }, { "epoch": 0.92, "grad_norm": 2.0550206928753614, "learning_rate": 8.127150527279565e-06, "loss": 0.1602, "step": 3367 }, { "epoch": 0.92, "grad_norm": 2.200589011616952, "learning_rate": 8.126000507554688e-06, "loss": 0.1838, "step": 3368 }, { "epoch": 0.92, "grad_norm": 2.1442422115987174, "learning_rate": 8.124850216276023e-06, "loss": 0.2015, "step": 3369 }, { "epoch": 0.92, "grad_norm": 2.3846688878144655, "learning_rate": 8.1236996535435e-06, "loss": 0.2376, "step": 3370 }, { "epoch": 0.92, "grad_norm": 2.262346544704719, "learning_rate": 8.122548819457063e-06, "loss": 0.1841, "step": 3371 }, { "epoch": 0.92, "grad_norm": 2.1317126817830725, "learning_rate": 8.121397714116686e-06, "loss": 0.1769, "step": 3372 }, { "epoch": 0.92, "grad_norm": 2.1110246032851303, "learning_rate": 8.120246337622364e-06, "loss": 0.1797, "step": 3373 }, { "epoch": 0.92, "grad_norm": 2.0968224383971763, "learning_rate": 8.119094690074119e-06, "loss": 0.1717, "step": 3374 }, { "epoch": 0.92, "grad_norm": 2.0633835379639307, "learning_rate": 8.117942771571992e-06, "loss": 0.1776, "step": 3375 }, { "epoch": 0.92, "grad_norm": 2.343492438496204, "learning_rate": 8.11679058221605e-06, "loss": 0.1941, "step": 3376 }, { "epoch": 0.92, "grad_norm": 2.1212228457088727, "learning_rate": 8.115638122106382e-06, "loss": 0.2008, "step": 3377 }, { "epoch": 0.92, "grad_norm": 2.1254516867786184, "learning_rate": 8.114485391343102e-06, "loss": 0.1267, "step": 3378 }, { "epoch": 0.92, "grad_norm": 2.2565782996683383, "learning_rate": 8.113332390026348e-06, "loss": 0.2063, "step": 3379 }, { "epoch": 0.92, "grad_norm": 2.08004835438089, "learning_rate": 8.11217911825628e-06, "loss": 0.1941, "step": 3380 }, { "epoch": 0.92, "grad_norm": 2.0275372993617298, "learning_rate": 8.11102557613308e-06, "loss": 0.166, "step": 3381 }, { "epoch": 0.92, "grad_norm": 2.3199502608216522, "learning_rate": 8.10987176375696e-06, "loss": 0.1903, "step": 3382 }, { "epoch": 0.92, "grad_norm": 2.0210505290996137, "learning_rate": 8.108717681228146e-06, "loss": 0.1744, "step": 3383 }, { "epoch": 0.92, "grad_norm": 3.0014804385007214, "learning_rate": 8.107563328646897e-06, "loss": 0.1987, "step": 3384 }, { "epoch": 0.92, "grad_norm": 2.2547966612495665, "learning_rate": 8.106408706113486e-06, "loss": 0.2141, "step": 3385 }, { "epoch": 0.92, "grad_norm": 2.1260768948327278, "learning_rate": 8.10525381372822e-06, "loss": 0.1909, "step": 3386 }, { "epoch": 0.92, "grad_norm": 1.9969525668783519, "learning_rate": 8.104098651591418e-06, "loss": 0.1608, "step": 3387 }, { "epoch": 0.92, "grad_norm": 2.231321357827387, "learning_rate": 8.102943219803433e-06, "loss": 0.1844, "step": 3388 }, { "epoch": 0.93, "grad_norm": 1.9586615143030417, "learning_rate": 8.101787518464634e-06, "loss": 0.1747, "step": 3389 }, { "epoch": 0.93, "grad_norm": 2.3204957236827197, "learning_rate": 8.100631547675417e-06, "loss": 0.198, "step": 3390 }, { "epoch": 0.93, "grad_norm": 2.053834824641304, "learning_rate": 8.0994753075362e-06, "loss": 0.168, "step": 3391 }, { "epoch": 0.93, "grad_norm": 2.1464494303607586, "learning_rate": 8.098318798147426e-06, "loss": 0.149, "step": 3392 }, { "epoch": 0.93, "grad_norm": 2.4694733791375962, "learning_rate": 8.097162019609562e-06, "loss": 0.2131, "step": 3393 }, { "epoch": 0.93, "grad_norm": 2.2703502085464873, "learning_rate": 8.09600497202309e-06, "loss": 0.1768, "step": 3394 }, { "epoch": 0.93, "grad_norm": 2.2156428920251434, "learning_rate": 8.094847655488528e-06, "loss": 0.2003, "step": 3395 }, { "epoch": 0.93, "grad_norm": 2.1656901559941146, "learning_rate": 8.09369007010641e-06, "loss": 0.1946, "step": 3396 }, { "epoch": 0.93, "grad_norm": 2.625584483631351, "learning_rate": 8.092532215977293e-06, "loss": 0.1912, "step": 3397 }, { "epoch": 0.93, "grad_norm": 2.58355268290637, "learning_rate": 8.09137409320176e-06, "loss": 0.2121, "step": 3398 }, { "epoch": 0.93, "grad_norm": 2.361937405251314, "learning_rate": 8.090215701880418e-06, "loss": 0.1655, "step": 3399 }, { "epoch": 0.93, "grad_norm": 1.9789854426305598, "learning_rate": 8.089057042113895e-06, "loss": 0.1502, "step": 3400 }, { "epoch": 0.93, "grad_norm": 2.2216580271250836, "learning_rate": 8.087898114002842e-06, "loss": 0.1795, "step": 3401 }, { "epoch": 0.93, "grad_norm": 2.0856964605391375, "learning_rate": 8.086738917647937e-06, "loss": 0.1378, "step": 3402 }, { "epoch": 0.93, "grad_norm": 2.020846775830208, "learning_rate": 8.085579453149874e-06, "loss": 0.1549, "step": 3403 }, { "epoch": 0.93, "grad_norm": 2.2020348895434685, "learning_rate": 8.084419720609377e-06, "loss": 0.1748, "step": 3404 }, { "epoch": 0.93, "grad_norm": 2.1278798565663997, "learning_rate": 8.083259720127195e-06, "loss": 0.1756, "step": 3405 }, { "epoch": 0.93, "grad_norm": 2.5077072419121653, "learning_rate": 8.082099451804093e-06, "loss": 0.1605, "step": 3406 }, { "epoch": 0.93, "grad_norm": 2.2672746401792714, "learning_rate": 8.080938915740863e-06, "loss": 0.1736, "step": 3407 }, { "epoch": 0.93, "grad_norm": 2.2788171992700734, "learning_rate": 8.079778112038318e-06, "loss": 0.1598, "step": 3408 }, { "epoch": 0.93, "grad_norm": 2.2866941428106684, "learning_rate": 8.078617040797304e-06, "loss": 0.1981, "step": 3409 }, { "epoch": 0.93, "grad_norm": 2.13636483216342, "learning_rate": 8.077455702118673e-06, "loss": 0.1578, "step": 3410 }, { "epoch": 0.93, "grad_norm": 2.2036606064180004, "learning_rate": 8.076294096103316e-06, "loss": 0.1739, "step": 3411 }, { "epoch": 0.93, "grad_norm": 2.043876503178284, "learning_rate": 8.075132222852138e-06, "loss": 0.1709, "step": 3412 }, { "epoch": 0.93, "grad_norm": 2.1230623903068997, "learning_rate": 8.073970082466071e-06, "loss": 0.1859, "step": 3413 }, { "epoch": 0.93, "grad_norm": 2.1414403624526486, "learning_rate": 8.072807675046073e-06, "loss": 0.1707, "step": 3414 }, { "epoch": 0.93, "grad_norm": 2.0688715663520507, "learning_rate": 8.071645000693116e-06, "loss": 0.1678, "step": 3415 }, { "epoch": 0.93, "grad_norm": 2.1794382570736275, "learning_rate": 8.070482059508202e-06, "loss": 0.2066, "step": 3416 }, { "epoch": 0.93, "grad_norm": 1.9796256375435533, "learning_rate": 8.069318851592358e-06, "loss": 0.1688, "step": 3417 }, { "epoch": 0.93, "grad_norm": 2.1099252518864406, "learning_rate": 8.068155377046629e-06, "loss": 0.1614, "step": 3418 }, { "epoch": 0.93, "grad_norm": 2.178794028016435, "learning_rate": 8.066991635972087e-06, "loss": 0.172, "step": 3419 }, { "epoch": 0.93, "grad_norm": 2.0753428201336597, "learning_rate": 8.065827628469823e-06, "loss": 0.1825, "step": 3420 }, { "epoch": 0.93, "grad_norm": 2.0694085117241268, "learning_rate": 8.064663354640956e-06, "loss": 0.159, "step": 3421 }, { "epoch": 0.93, "grad_norm": 2.328959460484489, "learning_rate": 8.063498814586623e-06, "loss": 0.1977, "step": 3422 }, { "epoch": 0.93, "grad_norm": 1.9669738818523599, "learning_rate": 8.06233400840799e-06, "loss": 0.159, "step": 3423 }, { "epoch": 0.93, "grad_norm": 2.1442275564750948, "learning_rate": 8.06116893620624e-06, "loss": 0.1666, "step": 3424 }, { "epoch": 0.94, "grad_norm": 2.2924947982463078, "learning_rate": 8.060003598082587e-06, "loss": 0.166, "step": 3425 }, { "epoch": 0.94, "grad_norm": 2.134976702211855, "learning_rate": 8.058837994138256e-06, "loss": 0.1493, "step": 3426 }, { "epoch": 0.94, "grad_norm": 2.2141414172383787, "learning_rate": 8.057672124474508e-06, "loss": 0.1676, "step": 3427 }, { "epoch": 0.94, "grad_norm": 2.212568054359841, "learning_rate": 8.05650598919262e-06, "loss": 0.1552, "step": 3428 }, { "epoch": 0.94, "grad_norm": 2.2876868730832722, "learning_rate": 8.055339588393892e-06, "loss": 0.1708, "step": 3429 }, { "epoch": 0.94, "grad_norm": 2.239595571033942, "learning_rate": 8.05417292217965e-06, "loss": 0.1888, "step": 3430 }, { "epoch": 0.94, "grad_norm": 2.070562564412396, "learning_rate": 8.053005990651242e-06, "loss": 0.1446, "step": 3431 }, { "epoch": 0.94, "grad_norm": 2.16333935420736, "learning_rate": 8.051838793910038e-06, "loss": 0.1709, "step": 3432 }, { "epoch": 0.94, "grad_norm": 2.1235719033835774, "learning_rate": 8.05067133205743e-06, "loss": 0.1644, "step": 3433 }, { "epoch": 0.94, "grad_norm": 2.2918465911043255, "learning_rate": 8.049503605194837e-06, "loss": 0.1693, "step": 3434 }, { "epoch": 0.94, "grad_norm": 2.2404753228145404, "learning_rate": 8.0483356134237e-06, "loss": 0.1726, "step": 3435 }, { "epoch": 0.94, "grad_norm": 2.0528987096696114, "learning_rate": 8.047167356845475e-06, "loss": 0.1615, "step": 3436 }, { "epoch": 0.94, "grad_norm": 2.0699111281915896, "learning_rate": 8.045998835561656e-06, "loss": 0.1408, "step": 3437 }, { "epoch": 0.94, "grad_norm": 2.2646101158294734, "learning_rate": 8.04483004967375e-06, "loss": 0.1653, "step": 3438 }, { "epoch": 0.94, "grad_norm": 2.211999081533399, "learning_rate": 8.043660999283282e-06, "loss": 0.1941, "step": 3439 }, { "epoch": 0.94, "grad_norm": 2.2064024177081976, "learning_rate": 8.042491684491816e-06, "loss": 0.1997, "step": 3440 }, { "epoch": 0.94, "grad_norm": 2.0289132652750563, "learning_rate": 8.041322105400923e-06, "loss": 0.1471, "step": 3441 }, { "epoch": 0.94, "grad_norm": 2.1208623405231006, "learning_rate": 8.040152262112206e-06, "loss": 0.1798, "step": 3442 }, { "epoch": 0.94, "grad_norm": 2.219163433886025, "learning_rate": 8.038982154727288e-06, "loss": 0.1723, "step": 3443 }, { "epoch": 0.94, "grad_norm": 2.2545709877848497, "learning_rate": 8.03781178334782e-06, "loss": 0.1752, "step": 3444 }, { "epoch": 0.94, "grad_norm": 2.314027216226458, "learning_rate": 8.036641148075463e-06, "loss": 0.2001, "step": 3445 }, { "epoch": 0.94, "grad_norm": 2.1919804614253366, "learning_rate": 8.035470249011916e-06, "loss": 0.1817, "step": 3446 }, { "epoch": 0.94, "grad_norm": 2.1350167097930974, "learning_rate": 8.034299086258892e-06, "loss": 0.1505, "step": 3447 }, { "epoch": 0.94, "grad_norm": 2.1082956861360125, "learning_rate": 8.03312765991813e-06, "loss": 0.1474, "step": 3448 }, { "epoch": 0.94, "grad_norm": 2.469382675126546, "learning_rate": 8.031955970091389e-06, "loss": 0.1539, "step": 3449 }, { "epoch": 0.94, "grad_norm": 2.347055738968163, "learning_rate": 8.030784016880456e-06, "loss": 0.1952, "step": 3450 }, { "epoch": 0.94, "grad_norm": 2.292347892682746, "learning_rate": 8.029611800387134e-06, "loss": 0.1701, "step": 3451 }, { "epoch": 0.94, "grad_norm": 2.36423604990879, "learning_rate": 8.028439320713256e-06, "loss": 0.1809, "step": 3452 }, { "epoch": 0.94, "grad_norm": 2.1421634728899854, "learning_rate": 8.027266577960676e-06, "loss": 0.1847, "step": 3453 }, { "epoch": 0.94, "grad_norm": 1.9370081267153783, "learning_rate": 8.026093572231266e-06, "loss": 0.1421, "step": 3454 }, { "epoch": 0.94, "grad_norm": 2.1827786032604166, "learning_rate": 8.024920303626925e-06, "loss": 0.1753, "step": 3455 }, { "epoch": 0.94, "grad_norm": 2.333584098516215, "learning_rate": 8.023746772249574e-06, "loss": 0.1992, "step": 3456 }, { "epoch": 0.94, "grad_norm": 2.0367031019980093, "learning_rate": 8.02257297820116e-06, "loss": 0.1793, "step": 3457 }, { "epoch": 0.94, "grad_norm": 2.26262519575056, "learning_rate": 8.021398921583644e-06, "loss": 0.1834, "step": 3458 }, { "epoch": 0.94, "grad_norm": 2.108617365234379, "learning_rate": 8.020224602499024e-06, "loss": 0.1538, "step": 3459 }, { "epoch": 0.94, "grad_norm": 2.4657179891321555, "learning_rate": 8.019050021049303e-06, "loss": 0.1867, "step": 3460 }, { "epoch": 0.94, "grad_norm": 1.9461986777864206, "learning_rate": 8.017875177336522e-06, "loss": 0.1787, "step": 3461 }, { "epoch": 0.95, "grad_norm": 1.9401714096142866, "learning_rate": 8.016700071462736e-06, "loss": 0.1564, "step": 3462 }, { "epoch": 0.95, "grad_norm": 2.1614582313937976, "learning_rate": 8.015524703530028e-06, "loss": 0.1725, "step": 3463 }, { "epoch": 0.95, "grad_norm": 2.2348289930688177, "learning_rate": 8.014349073640504e-06, "loss": 0.2069, "step": 3464 }, { "epoch": 0.95, "grad_norm": 2.0380167889262735, "learning_rate": 8.013173181896283e-06, "loss": 0.1764, "step": 3465 }, { "epoch": 0.95, "grad_norm": 1.9548494264634033, "learning_rate": 8.011997028399518e-06, "loss": 0.1635, "step": 3466 }, { "epoch": 0.95, "grad_norm": 2.183734264974981, "learning_rate": 8.010820613252383e-06, "loss": 0.1786, "step": 3467 }, { "epoch": 0.95, "grad_norm": 2.0667665607313888, "learning_rate": 8.00964393655707e-06, "loss": 0.1738, "step": 3468 }, { "epoch": 0.95, "grad_norm": 2.2817955488929207, "learning_rate": 8.008466998415795e-06, "loss": 0.1822, "step": 3469 }, { "epoch": 0.95, "grad_norm": 2.1144768277086348, "learning_rate": 8.0072897989308e-06, "loss": 0.1683, "step": 3470 }, { "epoch": 0.95, "grad_norm": 2.283772571639778, "learning_rate": 8.006112338204348e-06, "loss": 0.2007, "step": 3471 }, { "epoch": 0.95, "grad_norm": 2.0776714690143367, "learning_rate": 8.004934616338721e-06, "loss": 0.1766, "step": 3472 }, { "epoch": 0.95, "grad_norm": 2.3441820787104968, "learning_rate": 8.003756633436233e-06, "loss": 0.2139, "step": 3473 }, { "epoch": 0.95, "grad_norm": 2.106388984871136, "learning_rate": 8.002578389599208e-06, "loss": 0.1665, "step": 3474 }, { "epoch": 0.95, "grad_norm": 2.155528158851314, "learning_rate": 8.001399884930004e-06, "loss": 0.1392, "step": 3475 }, { "epoch": 0.95, "grad_norm": 2.4168729617208884, "learning_rate": 8.000221119530993e-06, "loss": 0.2105, "step": 3476 }, { "epoch": 0.95, "grad_norm": 1.7080133395427586, "learning_rate": 7.999042093504578e-06, "loss": 0.1258, "step": 3477 }, { "epoch": 0.95, "grad_norm": 1.9973846088719347, "learning_rate": 7.997862806953177e-06, "loss": 0.1594, "step": 3478 }, { "epoch": 0.95, "grad_norm": 2.198990238001898, "learning_rate": 7.996683259979237e-06, "loss": 0.1759, "step": 3479 }, { "epoch": 0.95, "grad_norm": 2.1789223569845575, "learning_rate": 7.99550345268522e-06, "loss": 0.1672, "step": 3480 }, { "epoch": 0.95, "grad_norm": 2.6053157771372377, "learning_rate": 7.994323385173618e-06, "loss": 0.1711, "step": 3481 }, { "epoch": 0.95, "grad_norm": 2.1243318283349617, "learning_rate": 7.993143057546943e-06, "loss": 0.1467, "step": 3482 }, { "epoch": 0.95, "grad_norm": 2.3651409279697218, "learning_rate": 7.99196246990773e-06, "loss": 0.1786, "step": 3483 }, { "epoch": 0.95, "grad_norm": 2.1039675744831974, "learning_rate": 7.990781622358535e-06, "loss": 0.1657, "step": 3484 }, { "epoch": 0.95, "grad_norm": 2.3422274905057185, "learning_rate": 7.989600515001936e-06, "loss": 0.1951, "step": 3485 }, { "epoch": 0.95, "grad_norm": 2.2320673052169613, "learning_rate": 7.988419147940538e-06, "loss": 0.1974, "step": 3486 }, { "epoch": 0.95, "grad_norm": 2.571505983897901, "learning_rate": 7.987237521276962e-06, "loss": 0.2343, "step": 3487 }, { "epoch": 0.95, "grad_norm": 1.8619746740621699, "learning_rate": 7.986055635113859e-06, "loss": 0.1595, "step": 3488 }, { "epoch": 0.95, "grad_norm": 2.0890751586542735, "learning_rate": 7.984873489553896e-06, "loss": 0.2056, "step": 3489 }, { "epoch": 0.95, "grad_norm": 2.3582593618879937, "learning_rate": 7.983691084699768e-06, "loss": 0.1986, "step": 3490 }, { "epoch": 0.95, "grad_norm": 2.2436576397191965, "learning_rate": 7.982508420654187e-06, "loss": 0.2073, "step": 3491 }, { "epoch": 0.95, "grad_norm": 2.079998338064337, "learning_rate": 7.981325497519892e-06, "loss": 0.1919, "step": 3492 }, { "epoch": 0.95, "grad_norm": 1.931521167618295, "learning_rate": 7.980142315399641e-06, "loss": 0.1518, "step": 3493 }, { "epoch": 0.95, "grad_norm": 2.0978927386404465, "learning_rate": 7.978958874396219e-06, "loss": 0.1746, "step": 3494 }, { "epoch": 0.95, "grad_norm": 2.2262282445467982, "learning_rate": 7.977775174612427e-06, "loss": 0.1991, "step": 3495 }, { "epoch": 0.95, "grad_norm": 2.1060791843223274, "learning_rate": 7.976591216151097e-06, "loss": 0.1791, "step": 3496 }, { "epoch": 0.95, "grad_norm": 2.449195918529177, "learning_rate": 7.975406999115077e-06, "loss": 0.1755, "step": 3497 }, { "epoch": 0.95, "grad_norm": 2.180690228956373, "learning_rate": 7.974222523607236e-06, "loss": 0.1876, "step": 3498 }, { "epoch": 0.96, "grad_norm": 2.0838083420367175, "learning_rate": 7.973037789730473e-06, "loss": 0.1881, "step": 3499 }, { "epoch": 0.96, "grad_norm": 2.1994620223233645, "learning_rate": 7.971852797587703e-06, "loss": 0.1827, "step": 3500 }, { "epoch": 0.96, "grad_norm": 2.1028586358959234, "learning_rate": 7.970667547281864e-06, "loss": 0.1656, "step": 3501 }, { "epoch": 0.96, "grad_norm": 2.001220486504472, "learning_rate": 7.969482038915924e-06, "loss": 0.1622, "step": 3502 }, { "epoch": 0.96, "grad_norm": 2.593041833952668, "learning_rate": 7.968296272592862e-06, "loss": 0.1961, "step": 3503 }, { "epoch": 0.96, "grad_norm": 2.2221717220322943, "learning_rate": 7.967110248415684e-06, "loss": 0.1651, "step": 3504 }, { "epoch": 0.96, "grad_norm": 2.0717810340002614, "learning_rate": 7.965923966487423e-06, "loss": 0.1623, "step": 3505 }, { "epoch": 0.96, "grad_norm": 2.1646505978563892, "learning_rate": 7.964737426911129e-06, "loss": 0.203, "step": 3506 }, { "epoch": 0.96, "grad_norm": 2.079734266405713, "learning_rate": 7.963550629789875e-06, "loss": 0.1835, "step": 3507 }, { "epoch": 0.96, "grad_norm": 2.2339854210790286, "learning_rate": 7.962363575226762e-06, "loss": 0.1775, "step": 3508 }, { "epoch": 0.96, "grad_norm": 2.1768851108290446, "learning_rate": 7.961176263324902e-06, "loss": 0.153, "step": 3509 }, { "epoch": 0.96, "grad_norm": 2.299772264652932, "learning_rate": 7.959988694187438e-06, "loss": 0.1692, "step": 3510 }, { "epoch": 0.96, "grad_norm": 2.274121660579695, "learning_rate": 7.958800867917536e-06, "loss": 0.1562, "step": 3511 }, { "epoch": 0.96, "grad_norm": 2.1210062952820934, "learning_rate": 7.95761278461838e-06, "loss": 0.1912, "step": 3512 }, { "epoch": 0.96, "grad_norm": 2.3986542287361936, "learning_rate": 7.956424444393179e-06, "loss": 0.1834, "step": 3513 }, { "epoch": 0.96, "grad_norm": 2.0478773258113043, "learning_rate": 7.955235847345162e-06, "loss": 0.1727, "step": 3514 }, { "epoch": 0.96, "grad_norm": 2.510867699445433, "learning_rate": 7.954046993577585e-06, "loss": 0.1494, "step": 3515 }, { "epoch": 0.96, "grad_norm": 2.3300426179512552, "learning_rate": 7.952857883193716e-06, "loss": 0.1902, "step": 3516 }, { "epoch": 0.96, "grad_norm": 2.3439448304895225, "learning_rate": 7.95166851629686e-06, "loss": 0.162, "step": 3517 }, { "epoch": 0.96, "grad_norm": 1.8758526820004597, "learning_rate": 7.950478892990334e-06, "loss": 0.1461, "step": 3518 }, { "epoch": 0.96, "grad_norm": 2.1069712167874437, "learning_rate": 7.949289013377476e-06, "loss": 0.1515, "step": 3519 }, { "epoch": 0.96, "grad_norm": 2.2333934607562704, "learning_rate": 7.948098877561657e-06, "loss": 0.1502, "step": 3520 }, { "epoch": 0.96, "grad_norm": 2.108495201748499, "learning_rate": 7.946908485646256e-06, "loss": 0.1855, "step": 3521 }, { "epoch": 0.96, "grad_norm": 2.0846383761194605, "learning_rate": 7.945717837734688e-06, "loss": 0.1761, "step": 3522 }, { "epoch": 0.96, "grad_norm": 2.023161732682581, "learning_rate": 7.94452693393038e-06, "loss": 0.1369, "step": 3523 }, { "epoch": 0.96, "grad_norm": 1.9717515278406028, "learning_rate": 7.943335774336788e-06, "loss": 0.1721, "step": 3524 }, { "epoch": 0.96, "grad_norm": 2.0739791365240667, "learning_rate": 7.942144359057385e-06, "loss": 0.1798, "step": 3525 }, { "epoch": 0.96, "grad_norm": 3.4664264572135206, "learning_rate": 7.940952688195668e-06, "loss": 0.135, "step": 3526 }, { "epoch": 0.96, "grad_norm": 2.218813882567977, "learning_rate": 7.93976076185516e-06, "loss": 0.1685, "step": 3527 }, { "epoch": 0.96, "grad_norm": 2.1221146363517813, "learning_rate": 7.9385685801394e-06, "loss": 0.1751, "step": 3528 }, { "epoch": 0.96, "grad_norm": 2.246879673976531, "learning_rate": 7.937376143151952e-06, "loss": 0.1625, "step": 3529 }, { "epoch": 0.96, "grad_norm": 1.8532174437228135, "learning_rate": 7.936183450996402e-06, "loss": 0.1496, "step": 3530 }, { "epoch": 0.96, "grad_norm": 2.2308435776722457, "learning_rate": 7.934990503776363e-06, "loss": 0.1859, "step": 3531 }, { "epoch": 0.96, "grad_norm": 2.0046124455969823, "learning_rate": 7.933797301595461e-06, "loss": 0.1747, "step": 3532 }, { "epoch": 0.96, "grad_norm": 2.2308768703659925, "learning_rate": 7.93260384455735e-06, "loss": 0.2035, "step": 3533 }, { "epoch": 0.96, "grad_norm": 2.2799449370538554, "learning_rate": 7.931410132765705e-06, "loss": 0.1943, "step": 3534 }, { "epoch": 0.97, "grad_norm": 2.0629483766815184, "learning_rate": 7.930216166324222e-06, "loss": 0.1684, "step": 3535 }, { "epoch": 0.97, "grad_norm": 2.101585602817571, "learning_rate": 7.929021945336622e-06, "loss": 0.1586, "step": 3536 }, { "epoch": 0.97, "grad_norm": 2.395470517501492, "learning_rate": 7.927827469906646e-06, "loss": 0.2002, "step": 3537 }, { "epoch": 0.97, "grad_norm": 2.0733325071509, "learning_rate": 7.926632740138056e-06, "loss": 0.1973, "step": 3538 }, { "epoch": 0.97, "grad_norm": 2.173906098320224, "learning_rate": 7.925437756134638e-06, "loss": 0.1849, "step": 3539 }, { "epoch": 0.97, "grad_norm": 2.0835824339257663, "learning_rate": 7.9242425180002e-06, "loss": 0.1991, "step": 3540 }, { "epoch": 0.97, "grad_norm": 2.178607895657494, "learning_rate": 7.923047025838573e-06, "loss": 0.184, "step": 3541 }, { "epoch": 0.97, "grad_norm": 1.8557686680474144, "learning_rate": 7.921851279753606e-06, "loss": 0.1478, "step": 3542 }, { "epoch": 0.97, "grad_norm": 2.226527726189626, "learning_rate": 7.920655279849173e-06, "loss": 0.1731, "step": 3543 }, { "epoch": 0.97, "grad_norm": 2.263842902523124, "learning_rate": 7.91945902622917e-06, "loss": 0.164, "step": 3544 }, { "epoch": 0.97, "grad_norm": 2.222812893933422, "learning_rate": 7.918262518997517e-06, "loss": 0.1633, "step": 3545 }, { "epoch": 0.97, "grad_norm": 2.345809616476078, "learning_rate": 7.917065758258152e-06, "loss": 0.1384, "step": 3546 }, { "epoch": 0.97, "grad_norm": 2.1612431231219293, "learning_rate": 7.915868744115036e-06, "loss": 0.2035, "step": 3547 }, { "epoch": 0.97, "grad_norm": 2.1440309679864504, "learning_rate": 7.914671476672156e-06, "loss": 0.1661, "step": 3548 }, { "epoch": 0.97, "grad_norm": 2.1923003326283714, "learning_rate": 7.913473956033515e-06, "loss": 0.1897, "step": 3549 }, { "epoch": 0.97, "grad_norm": 2.373787317714028, "learning_rate": 7.912276182303142e-06, "loss": 0.1977, "step": 3550 }, { "epoch": 0.97, "grad_norm": 2.297594420104452, "learning_rate": 7.911078155585086e-06, "loss": 0.186, "step": 3551 }, { "epoch": 0.97, "grad_norm": 2.324221234412241, "learning_rate": 7.909879875983422e-06, "loss": 0.1742, "step": 3552 }, { "epoch": 0.97, "grad_norm": 1.891183263793428, "learning_rate": 7.90868134360224e-06, "loss": 0.1526, "step": 3553 }, { "epoch": 0.97, "grad_norm": 2.1037199362594383, "learning_rate": 7.907482558545656e-06, "loss": 0.1878, "step": 3554 }, { "epoch": 0.97, "grad_norm": 2.0096736546789993, "learning_rate": 7.90628352091781e-06, "loss": 0.1553, "step": 3555 }, { "epoch": 0.97, "grad_norm": 2.031641656027865, "learning_rate": 7.90508423082286e-06, "loss": 0.1565, "step": 3556 }, { "epoch": 0.97, "grad_norm": 2.185495313637928, "learning_rate": 7.90388468836499e-06, "loss": 0.1759, "step": 3557 }, { "epoch": 0.97, "grad_norm": 2.057603718256187, "learning_rate": 7.9026848936484e-06, "loss": 0.1663, "step": 3558 }, { "epoch": 0.97, "grad_norm": 2.0721491998024595, "learning_rate": 7.901484846777318e-06, "loss": 0.1807, "step": 3559 }, { "epoch": 0.97, "grad_norm": 2.052862677469006, "learning_rate": 7.900284547855992e-06, "loss": 0.1592, "step": 3560 }, { "epoch": 0.97, "grad_norm": 2.308106480239381, "learning_rate": 7.899083996988688e-06, "loss": 0.185, "step": 3561 }, { "epoch": 0.97, "grad_norm": 2.2031022737955976, "learning_rate": 7.8978831942797e-06, "loss": 0.1883, "step": 3562 }, { "epoch": 0.97, "grad_norm": 2.0664145451726816, "learning_rate": 7.89668213983334e-06, "loss": 0.1854, "step": 3563 }, { "epoch": 0.97, "grad_norm": 2.252339097079376, "learning_rate": 7.895480833753942e-06, "loss": 0.1899, "step": 3564 }, { "epoch": 0.97, "grad_norm": 2.2414139940806104, "learning_rate": 7.894279276145864e-06, "loss": 0.1687, "step": 3565 }, { "epoch": 0.97, "grad_norm": 2.2418496338234584, "learning_rate": 7.893077467113484e-06, "loss": 0.2083, "step": 3566 }, { "epoch": 0.97, "grad_norm": 2.0779303500377067, "learning_rate": 7.891875406761203e-06, "loss": 0.1646, "step": 3567 }, { "epoch": 0.97, "grad_norm": 2.5537845178608345, "learning_rate": 7.890673095193444e-06, "loss": 0.2314, "step": 3568 }, { "epoch": 0.97, "grad_norm": 2.1283538997559455, "learning_rate": 7.889470532514648e-06, "loss": 0.1805, "step": 3569 }, { "epoch": 0.97, "grad_norm": 2.085581684336599, "learning_rate": 7.888267718829283e-06, "loss": 0.1738, "step": 3570 }, { "epoch": 0.97, "grad_norm": 2.2679729945442935, "learning_rate": 7.887064654241837e-06, "loss": 0.1628, "step": 3571 }, { "epoch": 0.98, "grad_norm": 2.0552760794668705, "learning_rate": 7.88586133885682e-06, "loss": 0.1581, "step": 3572 }, { "epoch": 0.98, "grad_norm": 2.2094837558584857, "learning_rate": 7.884657772778761e-06, "loss": 0.1704, "step": 3573 }, { "epoch": 0.98, "grad_norm": 1.7712533441270153, "learning_rate": 7.883453956112215e-06, "loss": 0.1429, "step": 3574 }, { "epoch": 0.98, "grad_norm": 2.2793545454710697, "learning_rate": 7.882249888961755e-06, "loss": 0.183, "step": 3575 }, { "epoch": 0.98, "grad_norm": 2.254728717853408, "learning_rate": 7.881045571431982e-06, "loss": 0.1751, "step": 3576 }, { "epoch": 0.98, "grad_norm": 1.9783315134459758, "learning_rate": 7.87984100362751e-06, "loss": 0.146, "step": 3577 }, { "epoch": 0.98, "grad_norm": 2.2016207543503645, "learning_rate": 7.878636185652977e-06, "loss": 0.1843, "step": 3578 }, { "epoch": 0.98, "grad_norm": 2.393689134300773, "learning_rate": 7.87743111761305e-06, "loss": 0.179, "step": 3579 }, { "epoch": 0.98, "grad_norm": 2.4130954387645085, "learning_rate": 7.876225799612413e-06, "loss": 0.1879, "step": 3580 }, { "epoch": 0.98, "grad_norm": 2.382150322873826, "learning_rate": 7.875020231755766e-06, "loss": 0.1866, "step": 3581 }, { "epoch": 0.98, "grad_norm": 2.1816127050711116, "learning_rate": 7.87381441414784e-06, "loss": 0.1699, "step": 3582 }, { "epoch": 0.98, "grad_norm": 2.0412915989688094, "learning_rate": 7.872608346893384e-06, "loss": 0.1451, "step": 3583 }, { "epoch": 0.98, "grad_norm": 2.1345226653301457, "learning_rate": 7.871402030097164e-06, "loss": 0.1699, "step": 3584 }, { "epoch": 0.98, "grad_norm": 2.2619096872164257, "learning_rate": 7.870195463863976e-06, "loss": 0.205, "step": 3585 }, { "epoch": 0.98, "grad_norm": 2.3620466324276643, "learning_rate": 7.868988648298632e-06, "loss": 0.2061, "step": 3586 }, { "epoch": 0.98, "grad_norm": 2.195944428029587, "learning_rate": 7.867781583505968e-06, "loss": 0.2029, "step": 3587 }, { "epoch": 0.98, "grad_norm": 2.085782909047239, "learning_rate": 7.866574269590842e-06, "loss": 0.1839, "step": 3588 }, { "epoch": 0.98, "grad_norm": 2.114297515579239, "learning_rate": 7.86536670665813e-06, "loss": 0.1631, "step": 3589 }, { "epoch": 0.98, "grad_norm": 2.4204376026162704, "learning_rate": 7.864158894812734e-06, "loss": 0.1874, "step": 3590 }, { "epoch": 0.98, "grad_norm": 2.0932578100402917, "learning_rate": 7.862950834159577e-06, "loss": 0.1759, "step": 3591 }, { "epoch": 0.98, "grad_norm": 1.9257234083304198, "learning_rate": 7.8617425248036e-06, "loss": 0.1529, "step": 3592 }, { "epoch": 0.98, "grad_norm": 1.8362723308079476, "learning_rate": 7.86053396684977e-06, "loss": 0.1658, "step": 3593 }, { "epoch": 0.98, "grad_norm": 2.092559604046123, "learning_rate": 7.859325160403073e-06, "loss": 0.1885, "step": 3594 }, { "epoch": 0.98, "grad_norm": 2.08425924056734, "learning_rate": 7.858116105568515e-06, "loss": 0.1631, "step": 3595 }, { "epoch": 0.98, "grad_norm": 2.115557562049383, "learning_rate": 7.856906802451129e-06, "loss": 0.1911, "step": 3596 }, { "epoch": 0.98, "grad_norm": 2.0559289771974267, "learning_rate": 7.855697251155967e-06, "loss": 0.1761, "step": 3597 }, { "epoch": 0.98, "grad_norm": 1.9382733345003595, "learning_rate": 7.8544874517881e-06, "loss": 0.1667, "step": 3598 }, { "epoch": 0.98, "grad_norm": 2.187024826977775, "learning_rate": 7.853277404452622e-06, "loss": 0.1533, "step": 3599 }, { "epoch": 0.98, "grad_norm": 2.4844125425681365, "learning_rate": 7.85206710925465e-06, "loss": 0.2034, "step": 3600 }, { "epoch": 0.98, "grad_norm": 2.2583852699595086, "learning_rate": 7.850856566299326e-06, "loss": 0.187, "step": 3601 }, { "epoch": 0.98, "grad_norm": 2.240428346124938, "learning_rate": 7.8496457756918e-06, "loss": 0.1903, "step": 3602 }, { "epoch": 0.98, "grad_norm": 2.41276433076512, "learning_rate": 7.848434737537258e-06, "loss": 0.2157, "step": 3603 }, { "epoch": 0.98, "grad_norm": 2.23616051730908, "learning_rate": 7.847223451940903e-06, "loss": 0.2073, "step": 3604 }, { "epoch": 0.98, "grad_norm": 2.3841639453354038, "learning_rate": 7.846011919007958e-06, "loss": 0.1879, "step": 3605 }, { "epoch": 0.98, "grad_norm": 2.238312520591959, "learning_rate": 7.844800138843667e-06, "loss": 0.1883, "step": 3606 }, { "epoch": 0.98, "grad_norm": 1.9405395143229405, "learning_rate": 7.843588111553297e-06, "loss": 0.1697, "step": 3607 }, { "epoch": 0.98, "grad_norm": 1.9771168877083594, "learning_rate": 7.842375837242135e-06, "loss": 0.1653, "step": 3608 }, { "epoch": 0.99, "grad_norm": 1.9357223547452334, "learning_rate": 7.841163316015495e-06, "loss": 0.1664, "step": 3609 }, { "epoch": 0.99, "grad_norm": 1.9883664464605586, "learning_rate": 7.839950547978701e-06, "loss": 0.1646, "step": 3610 }, { "epoch": 0.99, "grad_norm": 2.1696297046698176, "learning_rate": 7.838737533237111e-06, "loss": 0.1842, "step": 3611 }, { "epoch": 0.99, "grad_norm": 2.084704948118195, "learning_rate": 7.837524271896097e-06, "loss": 0.1775, "step": 3612 }, { "epoch": 0.99, "grad_norm": 2.306153241843188, "learning_rate": 7.836310764061054e-06, "loss": 0.2025, "step": 3613 }, { "epoch": 0.99, "grad_norm": 2.0841266268660177, "learning_rate": 7.8350970098374e-06, "loss": 0.1546, "step": 3614 }, { "epoch": 0.99, "grad_norm": 2.169456772883008, "learning_rate": 7.833883009330573e-06, "loss": 0.1683, "step": 3615 }, { "epoch": 0.99, "grad_norm": 2.0801329041012435, "learning_rate": 7.832668762646027e-06, "loss": 0.1588, "step": 3616 }, { "epoch": 0.99, "grad_norm": 2.4326142118595597, "learning_rate": 7.831454269889251e-06, "loss": 0.2003, "step": 3617 }, { "epoch": 0.99, "grad_norm": 2.068686739382579, "learning_rate": 7.830239531165744e-06, "loss": 0.1649, "step": 3618 }, { "epoch": 0.99, "grad_norm": 2.2495280147914, "learning_rate": 7.829024546581028e-06, "loss": 0.1607, "step": 3619 }, { "epoch": 0.99, "grad_norm": 2.106744303449247, "learning_rate": 7.82780931624065e-06, "loss": 0.1745, "step": 3620 }, { "epoch": 0.99, "grad_norm": 1.954901983198919, "learning_rate": 7.826593840250175e-06, "loss": 0.1496, "step": 3621 }, { "epoch": 0.99, "grad_norm": 2.2596144770210866, "learning_rate": 7.825378118715192e-06, "loss": 0.2083, "step": 3622 }, { "epoch": 0.99, "grad_norm": 2.2979179680004824, "learning_rate": 7.824162151741309e-06, "loss": 0.1792, "step": 3623 }, { "epoch": 0.99, "grad_norm": 2.5728516987634054, "learning_rate": 7.822945939434156e-06, "loss": 0.1467, "step": 3624 }, { "epoch": 0.99, "grad_norm": 2.32380576821315, "learning_rate": 7.821729481899388e-06, "loss": 0.1801, "step": 3625 }, { "epoch": 0.99, "grad_norm": 2.341874849029923, "learning_rate": 7.820512779242673e-06, "loss": 0.1856, "step": 3626 }, { "epoch": 0.99, "grad_norm": 1.9082342384228124, "learning_rate": 7.819295831569708e-06, "loss": 0.1423, "step": 3627 }, { "epoch": 0.99, "grad_norm": 2.057660440735419, "learning_rate": 7.818078638986208e-06, "loss": 0.1851, "step": 3628 }, { "epoch": 0.99, "grad_norm": 1.9273872153792628, "learning_rate": 7.81686120159791e-06, "loss": 0.1606, "step": 3629 }, { "epoch": 0.99, "grad_norm": 2.275162468742561, "learning_rate": 7.815643519510571e-06, "loss": 0.1723, "step": 3630 }, { "epoch": 0.99, "grad_norm": 3.2205730258665675, "learning_rate": 7.81442559282997e-06, "loss": 0.1975, "step": 3631 }, { "epoch": 0.99, "grad_norm": 2.3172581151458465, "learning_rate": 7.813207421661911e-06, "loss": 0.2031, "step": 3632 }, { "epoch": 0.99, "grad_norm": 2.0986324998598818, "learning_rate": 7.811989006112212e-06, "loss": 0.1582, "step": 3633 }, { "epoch": 0.99, "grad_norm": 2.074398153701755, "learning_rate": 7.81077034628672e-06, "loss": 0.1678, "step": 3634 }, { "epoch": 0.99, "grad_norm": 2.288249927364208, "learning_rate": 7.809551442291294e-06, "loss": 0.1947, "step": 3635 }, { "epoch": 0.99, "grad_norm": 2.079007515606117, "learning_rate": 7.808332294231824e-06, "loss": 0.1763, "step": 3636 }, { "epoch": 0.99, "grad_norm": 1.7444197919171367, "learning_rate": 7.807112902214213e-06, "loss": 0.1334, "step": 3637 }, { "epoch": 0.99, "grad_norm": 2.5377783164990193, "learning_rate": 7.805893266344393e-06, "loss": 0.1846, "step": 3638 }, { "epoch": 0.99, "grad_norm": 1.9755284217690599, "learning_rate": 7.80467338672831e-06, "loss": 0.1755, "step": 3639 }, { "epoch": 0.99, "grad_norm": 2.1564631769413003, "learning_rate": 7.803453263471933e-06, "loss": 0.1971, "step": 3640 }, { "epoch": 0.99, "grad_norm": 2.166983344278988, "learning_rate": 7.802232896681259e-06, "loss": 0.1427, "step": 3641 }, { "epoch": 0.99, "grad_norm": 2.0767136281066185, "learning_rate": 7.801012286462294e-06, "loss": 0.1546, "step": 3642 }, { "epoch": 0.99, "grad_norm": 2.3473963684789525, "learning_rate": 7.799791432921075e-06, "loss": 0.1734, "step": 3643 }, { "epoch": 0.99, "grad_norm": 2.321964229409665, "learning_rate": 7.798570336163658e-06, "loss": 0.1905, "step": 3644 }, { "epoch": 1.0, "grad_norm": 2.269420383017954, "learning_rate": 7.797348996296116e-06, "loss": 0.1738, "step": 3645 }, { "epoch": 1.0, "grad_norm": 2.2162877772274028, "learning_rate": 7.796127413424547e-06, "loss": 0.1634, "step": 3646 }, { "epoch": 1.0, "grad_norm": 2.1415424951487454, "learning_rate": 7.794905587655071e-06, "loss": 0.1677, "step": 3647 }, { "epoch": 1.0, "grad_norm": 2.2387061876461987, "learning_rate": 7.793683519093825e-06, "loss": 0.1937, "step": 3648 }, { "epoch": 1.0, "grad_norm": 2.595956527584737, "learning_rate": 7.79246120784697e-06, "loss": 0.1937, "step": 3649 }, { "epoch": 1.0, "grad_norm": 2.222507791529048, "learning_rate": 7.791238654020686e-06, "loss": 0.1689, "step": 3650 }, { "epoch": 1.0, "grad_norm": 2.040363762646887, "learning_rate": 7.79001585772118e-06, "loss": 0.1563, "step": 3651 }, { "epoch": 1.0, "grad_norm": 2.1002846408100044, "learning_rate": 7.788792819054672e-06, "loss": 0.1717, "step": 3652 }, { "epoch": 1.0, "grad_norm": 1.9722218800934836, "learning_rate": 7.787569538127406e-06, "loss": 0.1284, "step": 3653 }, { "epoch": 1.0, "grad_norm": 2.18165286904065, "learning_rate": 7.78634601504565e-06, "loss": 0.1586, "step": 3654 }, { "epoch": 1.0, "grad_norm": 1.9558197607387153, "learning_rate": 7.785122249915688e-06, "loss": 0.1391, "step": 3655 }, { "epoch": 1.0, "grad_norm": 2.02086360664275, "learning_rate": 7.783898242843832e-06, "loss": 0.1724, "step": 3656 }, { "epoch": 1.0, "grad_norm": 2.2701007232384374, "learning_rate": 7.782673993936408e-06, "loss": 0.1975, "step": 3657 }, { "epoch": 1.0, "grad_norm": 2.23036440140428, "learning_rate": 7.781449503299764e-06, "loss": 0.1932, "step": 3658 }, { "epoch": 1.0, "grad_norm": 2.2008271452439723, "learning_rate": 7.780224771040275e-06, "loss": 0.1758, "step": 3659 }, { "epoch": 1.0, "grad_norm": 2.0738143177534427, "learning_rate": 7.77899979726433e-06, "loss": 0.1504, "step": 3660 }, { "epoch": 1.0, "grad_norm": 2.315121437885548, "learning_rate": 7.777774582078342e-06, "loss": 0.212, "step": 3661 }, { "epoch": 1.0, "grad_norm": 2.262291693554671, "learning_rate": 7.776549125588743e-06, "loss": 0.1997, "step": 3662 }, { "epoch": 1.0, "grad_norm": 2.0893907627131463, "learning_rate": 7.775323427901993e-06, "loss": 0.1618, "step": 3663 }, { "epoch": 1.0, "grad_norm": 1.8423042655805577, "learning_rate": 7.774097489124562e-06, "loss": 0.1244, "step": 3664 }, { "epoch": 1.0, "grad_norm": 1.8418254739445166, "learning_rate": 7.77287130936295e-06, "loss": 0.1253, "step": 3665 }, { "epoch": 1.0, "grad_norm": 1.657948802551241, "learning_rate": 7.771644888723672e-06, "loss": 0.1314, "step": 3666 }, { "epoch": 1.0, "grad_norm": 1.9144428569459166, "learning_rate": 7.77041822731327e-06, "loss": 0.1279, "step": 3667 }, { "epoch": 1.0, "grad_norm": 2.2205524232330776, "learning_rate": 7.7691913252383e-06, "loss": 0.1421, "step": 3668 }, { "epoch": 1.0, "grad_norm": 1.9555913677668197, "learning_rate": 7.767964182605344e-06, "loss": 0.1336, "step": 3669 }, { "epoch": 1.0, "grad_norm": 2.13653645398747, "learning_rate": 7.766736799521e-06, "loss": 0.1051, "step": 3670 }, { "epoch": 1.0, "grad_norm": 1.9341991943882766, "learning_rate": 7.765509176091894e-06, "loss": 0.1026, "step": 3671 }, { "epoch": 1.0, "grad_norm": 2.357264880011443, "learning_rate": 7.764281312424668e-06, "loss": 0.162, "step": 3672 }, { "epoch": 1.0, "grad_norm": 1.9847222722541236, "learning_rate": 7.763053208625985e-06, "loss": 0.128, "step": 3673 }, { "epoch": 1.0, "grad_norm": 1.895491652290345, "learning_rate": 7.76182486480253e-06, "loss": 0.1211, "step": 3674 }, { "epoch": 1.0, "grad_norm": 2.0884058412451094, "learning_rate": 7.760596281061008e-06, "loss": 0.1084, "step": 3675 }, { "epoch": 1.0, "grad_norm": 2.5606195645329892, "learning_rate": 7.759367457508145e-06, "loss": 0.152, "step": 3676 }, { "epoch": 1.0, "grad_norm": 2.2232985036917103, "learning_rate": 7.75813839425069e-06, "loss": 0.1297, "step": 3677 }, { "epoch": 1.0, "grad_norm": 2.1246733133536146, "learning_rate": 7.756909091395409e-06, "loss": 0.1489, "step": 3678 }, { "epoch": 1.0, "grad_norm": 2.3298561093334205, "learning_rate": 7.755679549049093e-06, "loss": 0.1301, "step": 3679 }, { "epoch": 1.0, "grad_norm": 1.787679274015443, "learning_rate": 7.754449767318548e-06, "loss": 0.1325, "step": 3680 }, { "epoch": 1.0, "grad_norm": 2.2458370436488218, "learning_rate": 7.753219746310607e-06, "loss": 0.1342, "step": 3681 }, { "epoch": 1.01, "grad_norm": 2.0569591421773312, "learning_rate": 7.751989486132122e-06, "loss": 0.1255, "step": 3682 }, { "epoch": 1.01, "grad_norm": 1.9517107500015811, "learning_rate": 7.750758986889963e-06, "loss": 0.1225, "step": 3683 }, { "epoch": 1.01, "grad_norm": 1.7270387776582103, "learning_rate": 7.749528248691026e-06, "loss": 0.1009, "step": 3684 }, { "epoch": 1.01, "grad_norm": 1.9922838751071787, "learning_rate": 7.748297271642218e-06, "loss": 0.1153, "step": 3685 }, { "epoch": 1.01, "grad_norm": 2.022761341914585, "learning_rate": 7.747066055850479e-06, "loss": 0.1188, "step": 3686 }, { "epoch": 1.01, "grad_norm": 2.002268246115833, "learning_rate": 7.745834601422762e-06, "loss": 0.1089, "step": 3687 }, { "epoch": 1.01, "grad_norm": 2.0257435845184664, "learning_rate": 7.744602908466044e-06, "loss": 0.1335, "step": 3688 }, { "epoch": 1.01, "grad_norm": 1.906314083171679, "learning_rate": 7.743370977087318e-06, "loss": 0.1185, "step": 3689 }, { "epoch": 1.01, "grad_norm": 1.9820137401200066, "learning_rate": 7.742138807393607e-06, "loss": 0.1203, "step": 3690 }, { "epoch": 1.01, "grad_norm": 1.7000351401395506, "learning_rate": 7.740906399491941e-06, "loss": 0.1156, "step": 3691 }, { "epoch": 1.01, "grad_norm": 1.853642092895378, "learning_rate": 7.739673753489386e-06, "loss": 0.1048, "step": 3692 }, { "epoch": 1.01, "grad_norm": 2.8665274999096684, "learning_rate": 7.738440869493018e-06, "loss": 0.1399, "step": 3693 }, { "epoch": 1.01, "grad_norm": 1.8854289206162174, "learning_rate": 7.737207747609936e-06, "loss": 0.1197, "step": 3694 }, { "epoch": 1.01, "grad_norm": 1.982596994289207, "learning_rate": 7.73597438794726e-06, "loss": 0.1263, "step": 3695 }, { "epoch": 1.01, "grad_norm": 2.2178625175209574, "learning_rate": 7.734740790612137e-06, "loss": 0.1424, "step": 3696 }, { "epoch": 1.01, "grad_norm": 2.3352004148220162, "learning_rate": 7.73350695571172e-06, "loss": 0.1402, "step": 3697 }, { "epoch": 1.01, "grad_norm": 1.8392551497793572, "learning_rate": 7.732272883353197e-06, "loss": 0.1006, "step": 3698 }, { "epoch": 1.01, "grad_norm": 2.1135223526792193, "learning_rate": 7.731038573643772e-06, "loss": 0.1352, "step": 3699 }, { "epoch": 1.01, "grad_norm": 2.4588044373414597, "learning_rate": 7.729804026690667e-06, "loss": 0.1231, "step": 3700 }, { "epoch": 1.01, "grad_norm": 2.1401973444446876, "learning_rate": 7.728569242601125e-06, "loss": 0.1308, "step": 3701 }, { "epoch": 1.01, "grad_norm": 1.933891212732119, "learning_rate": 7.727334221482412e-06, "loss": 0.0984, "step": 3702 }, { "epoch": 1.01, "grad_norm": 2.1801745492849016, "learning_rate": 7.726098963441815e-06, "loss": 0.1443, "step": 3703 }, { "epoch": 1.01, "grad_norm": 2.1297045046097427, "learning_rate": 7.72486346858664e-06, "loss": 0.114, "step": 3704 }, { "epoch": 1.01, "grad_norm": 2.057459886527118, "learning_rate": 7.72362773702421e-06, "loss": 0.1184, "step": 3705 }, { "epoch": 1.01, "grad_norm": 2.1681983924518784, "learning_rate": 7.722391768861875e-06, "loss": 0.1256, "step": 3706 }, { "epoch": 1.01, "grad_norm": 1.8668252852955303, "learning_rate": 7.721155564207003e-06, "loss": 0.1188, "step": 3707 }, { "epoch": 1.01, "grad_norm": 2.2069628875416116, "learning_rate": 7.719919123166984e-06, "loss": 0.1349, "step": 3708 }, { "epoch": 1.01, "grad_norm": 2.0008419645704647, "learning_rate": 7.718682445849224e-06, "loss": 0.1316, "step": 3709 }, { "epoch": 1.01, "grad_norm": 2.2983687304425855, "learning_rate": 7.717445532361152e-06, "loss": 0.1345, "step": 3710 }, { "epoch": 1.01, "grad_norm": 2.05051866052114, "learning_rate": 7.716208382810221e-06, "loss": 0.1058, "step": 3711 }, { "epoch": 1.01, "grad_norm": 2.0233261525054567, "learning_rate": 7.714970997303898e-06, "loss": 0.1222, "step": 3712 }, { "epoch": 1.01, "grad_norm": 1.9752764858048422, "learning_rate": 7.713733375949677e-06, "loss": 0.1201, "step": 3713 }, { "epoch": 1.01, "grad_norm": 1.873273117532547, "learning_rate": 7.712495518855067e-06, "loss": 0.1262, "step": 3714 }, { "epoch": 1.01, "grad_norm": 1.7002184049434432, "learning_rate": 7.711257426127601e-06, "loss": 0.1121, "step": 3715 }, { "epoch": 1.01, "grad_norm": 1.9103501157364418, "learning_rate": 7.710019097874833e-06, "loss": 0.1208, "step": 3716 }, { "epoch": 1.01, "grad_norm": 1.9079401513506784, "learning_rate": 7.708780534204332e-06, "loss": 0.1129, "step": 3717 }, { "epoch": 1.02, "grad_norm": 2.004981317480421, "learning_rate": 7.707541735223696e-06, "loss": 0.1248, "step": 3718 }, { "epoch": 1.02, "grad_norm": 1.9873701586717412, "learning_rate": 7.706302701040534e-06, "loss": 0.1214, "step": 3719 }, { "epoch": 1.02, "grad_norm": 2.7659708433541366, "learning_rate": 7.705063431762482e-06, "loss": 0.1345, "step": 3720 }, { "epoch": 1.02, "grad_norm": 2.0630262358471736, "learning_rate": 7.703823927497196e-06, "loss": 0.1267, "step": 3721 }, { "epoch": 1.02, "grad_norm": 2.1202664639270123, "learning_rate": 7.702584188352351e-06, "loss": 0.1227, "step": 3722 }, { "epoch": 1.02, "grad_norm": 1.9194476615333196, "learning_rate": 7.701344214435639e-06, "loss": 0.1093, "step": 3723 }, { "epoch": 1.02, "grad_norm": 2.1041895925649134, "learning_rate": 7.70010400585478e-06, "loss": 0.1311, "step": 3724 }, { "epoch": 1.02, "grad_norm": 1.791171872663612, "learning_rate": 7.69886356271751e-06, "loss": 0.1044, "step": 3725 }, { "epoch": 1.02, "grad_norm": 2.3081099524846302, "learning_rate": 7.697622885131579e-06, "loss": 0.1394, "step": 3726 }, { "epoch": 1.02, "grad_norm": 2.1810024380765274, "learning_rate": 7.696381973204772e-06, "loss": 0.1309, "step": 3727 }, { "epoch": 1.02, "grad_norm": 2.1530724119101965, "learning_rate": 7.695140827044882e-06, "loss": 0.1143, "step": 3728 }, { "epoch": 1.02, "grad_norm": 2.027416485250483, "learning_rate": 7.693899446759727e-06, "loss": 0.1189, "step": 3729 }, { "epoch": 1.02, "grad_norm": 2.103253731855292, "learning_rate": 7.692657832457146e-06, "loss": 0.1221, "step": 3730 }, { "epoch": 1.02, "grad_norm": 2.4178987848133366, "learning_rate": 7.691415984244998e-06, "loss": 0.1243, "step": 3731 }, { "epoch": 1.02, "grad_norm": 1.9307468950288047, "learning_rate": 7.69017390223116e-06, "loss": 0.1167, "step": 3732 }, { "epoch": 1.02, "grad_norm": 2.2186650535101213, "learning_rate": 7.688931586523531e-06, "loss": 0.1327, "step": 3733 }, { "epoch": 1.02, "grad_norm": 1.9361333348080088, "learning_rate": 7.687689037230031e-06, "loss": 0.117, "step": 3734 }, { "epoch": 1.02, "grad_norm": 1.8704648712928205, "learning_rate": 7.686446254458598e-06, "loss": 0.1118, "step": 3735 }, { "epoch": 1.02, "grad_norm": 2.0021402115150564, "learning_rate": 7.685203238317194e-06, "loss": 0.1282, "step": 3736 }, { "epoch": 1.02, "grad_norm": 1.8389938289627499, "learning_rate": 7.683959988913798e-06, "loss": 0.1172, "step": 3737 }, { "epoch": 1.02, "grad_norm": 2.2211323360832544, "learning_rate": 7.68271650635641e-06, "loss": 0.1363, "step": 3738 }, { "epoch": 1.02, "grad_norm": 2.38660748421882, "learning_rate": 7.68147279075305e-06, "loss": 0.1215, "step": 3739 }, { "epoch": 1.02, "grad_norm": 2.043336109098562, "learning_rate": 7.680228842211762e-06, "loss": 0.1292, "step": 3740 }, { "epoch": 1.02, "grad_norm": 2.077726491515614, "learning_rate": 7.678984660840603e-06, "loss": 0.1461, "step": 3741 }, { "epoch": 1.02, "grad_norm": 1.9413258346644382, "learning_rate": 7.677740246747657e-06, "loss": 0.1257, "step": 3742 }, { "epoch": 1.02, "grad_norm": 2.3790580947663225, "learning_rate": 7.676495600041025e-06, "loss": 0.117, "step": 3743 }, { "epoch": 1.02, "grad_norm": 1.931216787036907, "learning_rate": 7.675250720828827e-06, "loss": 0.1127, "step": 3744 }, { "epoch": 1.02, "grad_norm": 2.2817151131499283, "learning_rate": 7.674005609219208e-06, "loss": 0.1673, "step": 3745 }, { "epoch": 1.02, "grad_norm": 3.322409997825016, "learning_rate": 7.672760265320326e-06, "loss": 0.1399, "step": 3746 }, { "epoch": 1.02, "grad_norm": 2.0328968578081628, "learning_rate": 7.671514689240366e-06, "loss": 0.1073, "step": 3747 }, { "epoch": 1.02, "grad_norm": 2.190794783708201, "learning_rate": 7.670268881087532e-06, "loss": 0.1163, "step": 3748 }, { "epoch": 1.02, "grad_norm": 1.8360948468549523, "learning_rate": 7.669022840970042e-06, "loss": 0.0977, "step": 3749 }, { "epoch": 1.02, "grad_norm": 2.159420469519051, "learning_rate": 7.667776568996143e-06, "loss": 0.1101, "step": 3750 }, { "epoch": 1.02, "grad_norm": 1.9715156209059748, "learning_rate": 7.666530065274096e-06, "loss": 0.1094, "step": 3751 }, { "epoch": 1.02, "grad_norm": 2.0006232383871634, "learning_rate": 7.665283329912183e-06, "loss": 0.1159, "step": 3752 }, { "epoch": 1.02, "grad_norm": 1.891669338202314, "learning_rate": 7.664036363018709e-06, "loss": 0.0982, "step": 3753 }, { "epoch": 1.02, "grad_norm": 1.932178421119616, "learning_rate": 7.662789164702e-06, "loss": 0.1091, "step": 3754 }, { "epoch": 1.03, "grad_norm": 1.9550871481428564, "learning_rate": 7.661541735070392e-06, "loss": 0.1096, "step": 3755 }, { "epoch": 1.03, "grad_norm": 2.309750163994795, "learning_rate": 7.660294074232254e-06, "loss": 0.1355, "step": 3756 }, { "epoch": 1.03, "grad_norm": 1.9411718065366854, "learning_rate": 7.659046182295968e-06, "loss": 0.1159, "step": 3757 }, { "epoch": 1.03, "grad_norm": 2.0663007125384802, "learning_rate": 7.657798059369938e-06, "loss": 0.1033, "step": 3758 }, { "epoch": 1.03, "grad_norm": 1.699637014114936, "learning_rate": 7.656549705562588e-06, "loss": 0.104, "step": 3759 }, { "epoch": 1.03, "grad_norm": 2.0754942082614556, "learning_rate": 7.655301120982362e-06, "loss": 0.1247, "step": 3760 }, { "epoch": 1.03, "grad_norm": 1.884264305624918, "learning_rate": 7.65405230573772e-06, "loss": 0.1182, "step": 3761 }, { "epoch": 1.03, "grad_norm": 1.9442666225404777, "learning_rate": 7.65280325993715e-06, "loss": 0.1159, "step": 3762 }, { "epoch": 1.03, "grad_norm": 2.259687442842502, "learning_rate": 7.651553983689155e-06, "loss": 0.1399, "step": 3763 }, { "epoch": 1.03, "grad_norm": 1.9731502958964626, "learning_rate": 7.650304477102258e-06, "loss": 0.1169, "step": 3764 }, { "epoch": 1.03, "grad_norm": 2.8490187668665032, "learning_rate": 7.649054740285005e-06, "loss": 0.1338, "step": 3765 }, { "epoch": 1.03, "grad_norm": 2.093527942133517, "learning_rate": 7.647804773345957e-06, "loss": 0.1154, "step": 3766 }, { "epoch": 1.03, "grad_norm": 1.9360838865901533, "learning_rate": 7.6465545763937e-06, "loss": 0.1463, "step": 3767 }, { "epoch": 1.03, "grad_norm": 1.7420134041239128, "learning_rate": 7.645304149536833e-06, "loss": 0.1174, "step": 3768 }, { "epoch": 1.03, "grad_norm": 2.124621494746595, "learning_rate": 7.64405349288399e-06, "loss": 0.1416, "step": 3769 }, { "epoch": 1.03, "grad_norm": 2.032178427084541, "learning_rate": 7.642802606543805e-06, "loss": 0.1246, "step": 3770 }, { "epoch": 1.03, "grad_norm": 2.0429903039752175, "learning_rate": 7.641551490624945e-06, "loss": 0.1211, "step": 3771 }, { "epoch": 1.03, "grad_norm": 2.4544332284707857, "learning_rate": 7.640300145236096e-06, "loss": 0.1405, "step": 3772 }, { "epoch": 1.03, "grad_norm": 1.9425654583940424, "learning_rate": 7.63904857048596e-06, "loss": 0.1105, "step": 3773 }, { "epoch": 1.03, "grad_norm": 1.9763481900564752, "learning_rate": 7.637796766483259e-06, "loss": 0.1108, "step": 3774 }, { "epoch": 1.03, "grad_norm": 2.213957311424813, "learning_rate": 7.636544733336739e-06, "loss": 0.1459, "step": 3775 }, { "epoch": 1.03, "grad_norm": 1.9079870239827432, "learning_rate": 7.63529247115516e-06, "loss": 0.1262, "step": 3776 }, { "epoch": 1.03, "grad_norm": 1.9417706053933135, "learning_rate": 7.634039980047308e-06, "loss": 0.1249, "step": 3777 }, { "epoch": 1.03, "grad_norm": 1.8839413417862498, "learning_rate": 7.632787260121987e-06, "loss": 0.1223, "step": 3778 }, { "epoch": 1.03, "grad_norm": 1.730359986752707, "learning_rate": 7.631534311488016e-06, "loss": 0.1031, "step": 3779 }, { "epoch": 1.03, "grad_norm": 2.091028049090303, "learning_rate": 7.630281134254243e-06, "loss": 0.1119, "step": 3780 }, { "epoch": 1.03, "grad_norm": 1.9783565560528857, "learning_rate": 7.629027728529527e-06, "loss": 0.1146, "step": 3781 }, { "epoch": 1.03, "grad_norm": 2.207020810025933, "learning_rate": 7.627774094422751e-06, "loss": 0.1354, "step": 3782 }, { "epoch": 1.03, "grad_norm": 2.0780893017509325, "learning_rate": 7.626520232042819e-06, "loss": 0.1281, "step": 3783 }, { "epoch": 1.03, "grad_norm": 2.326454344067117, "learning_rate": 7.625266141498653e-06, "loss": 0.1508, "step": 3784 }, { "epoch": 1.03, "grad_norm": 2.2877337956174024, "learning_rate": 7.624011822899193e-06, "loss": 0.1338, "step": 3785 }, { "epoch": 1.03, "grad_norm": 2.0433247797705896, "learning_rate": 7.622757276353404e-06, "loss": 0.1318, "step": 3786 }, { "epoch": 1.03, "grad_norm": 2.477176615181054, "learning_rate": 7.621502501970266e-06, "loss": 0.1198, "step": 3787 }, { "epoch": 1.03, "grad_norm": 1.8899613382321498, "learning_rate": 7.62024749985878e-06, "loss": 0.1166, "step": 3788 }, { "epoch": 1.03, "grad_norm": 2.2823334619309605, "learning_rate": 7.618992270127968e-06, "loss": 0.1533, "step": 3789 }, { "epoch": 1.03, "grad_norm": 2.187582203800892, "learning_rate": 7.617736812886873e-06, "loss": 0.143, "step": 3790 }, { "epoch": 1.03, "grad_norm": 2.060816786485956, "learning_rate": 7.616481128244552e-06, "loss": 0.1247, "step": 3791 }, { "epoch": 1.04, "grad_norm": 2.1473759311973075, "learning_rate": 7.615225216310087e-06, "loss": 0.1215, "step": 3792 }, { "epoch": 1.04, "grad_norm": 2.027390599332439, "learning_rate": 7.61396907719258e-06, "loss": 0.1036, "step": 3793 }, { "epoch": 1.04, "grad_norm": 2.2999710157707582, "learning_rate": 7.612712711001149e-06, "loss": 0.1579, "step": 3794 }, { "epoch": 1.04, "grad_norm": 1.9420989573866498, "learning_rate": 7.611456117844934e-06, "loss": 0.1109, "step": 3795 }, { "epoch": 1.04, "grad_norm": 1.9202408310627497, "learning_rate": 7.610199297833097e-06, "loss": 0.1099, "step": 3796 }, { "epoch": 1.04, "grad_norm": 3.4931245594801488, "learning_rate": 7.6089422510748135e-06, "loss": 0.1296, "step": 3797 }, { "epoch": 1.04, "grad_norm": 2.487569412261905, "learning_rate": 7.607684977679284e-06, "loss": 0.133, "step": 3798 }, { "epoch": 1.04, "grad_norm": 2.2629390578533757, "learning_rate": 7.606427477755729e-06, "loss": 0.1063, "step": 3799 }, { "epoch": 1.04, "grad_norm": 1.815007814451184, "learning_rate": 7.605169751413382e-06, "loss": 0.1061, "step": 3800 }, { "epoch": 1.04, "grad_norm": 1.78255486807673, "learning_rate": 7.603911798761506e-06, "loss": 0.0984, "step": 3801 }, { "epoch": 1.04, "grad_norm": 1.9411296500169395, "learning_rate": 7.602653619909377e-06, "loss": 0.1046, "step": 3802 }, { "epoch": 1.04, "grad_norm": 1.9040436529835725, "learning_rate": 7.6013952149662905e-06, "loss": 0.1162, "step": 3803 }, { "epoch": 1.04, "grad_norm": 2.2592443603321635, "learning_rate": 7.600136584041564e-06, "loss": 0.1322, "step": 3804 }, { "epoch": 1.04, "grad_norm": 2.18545426159128, "learning_rate": 7.598877727244538e-06, "loss": 0.1287, "step": 3805 }, { "epoch": 1.04, "grad_norm": 2.5845484981997244, "learning_rate": 7.597618644684561e-06, "loss": 0.1423, "step": 3806 }, { "epoch": 1.04, "grad_norm": 7.122224330553804, "learning_rate": 7.596359336471015e-06, "loss": 0.1461, "step": 3807 }, { "epoch": 1.04, "grad_norm": 2.400711901823705, "learning_rate": 7.595099802713293e-06, "loss": 0.1438, "step": 3808 }, { "epoch": 1.04, "grad_norm": 2.1607933436257425, "learning_rate": 7.593840043520811e-06, "loss": 0.141, "step": 3809 }, { "epoch": 1.04, "grad_norm": 2.5193339257391743, "learning_rate": 7.592580059003002e-06, "loss": 0.1677, "step": 3810 }, { "epoch": 1.04, "grad_norm": 2.1070766809372454, "learning_rate": 7.591319849269322e-06, "loss": 0.1285, "step": 3811 }, { "epoch": 1.04, "grad_norm": 1.7893219394998408, "learning_rate": 7.590059414429243e-06, "loss": 0.1095, "step": 3812 }, { "epoch": 1.04, "grad_norm": 1.9859167953025019, "learning_rate": 7.588798754592258e-06, "loss": 0.1109, "step": 3813 }, { "epoch": 1.04, "grad_norm": 1.9754442486155872, "learning_rate": 7.5875378698678825e-06, "loss": 0.1204, "step": 3814 }, { "epoch": 1.04, "grad_norm": 2.1919939021031456, "learning_rate": 7.586276760365645e-06, "loss": 0.1571, "step": 3815 }, { "epoch": 1.04, "grad_norm": 1.8636472283709595, "learning_rate": 7.585015426195099e-06, "loss": 0.1072, "step": 3816 }, { "epoch": 1.04, "grad_norm": 1.9258476176040322, "learning_rate": 7.583753867465819e-06, "loss": 0.116, "step": 3817 }, { "epoch": 1.04, "grad_norm": 1.8914851751763961, "learning_rate": 7.582492084287389e-06, "loss": 0.1092, "step": 3818 }, { "epoch": 1.04, "grad_norm": 2.2381351920084693, "learning_rate": 7.581230076769426e-06, "loss": 0.1539, "step": 3819 }, { "epoch": 1.04, "grad_norm": 2.0667574388644927, "learning_rate": 7.5799678450215566e-06, "loss": 0.124, "step": 3820 }, { "epoch": 1.04, "grad_norm": 2.2180434666246245, "learning_rate": 7.57870538915343e-06, "loss": 0.1375, "step": 3821 }, { "epoch": 1.04, "grad_norm": 2.0791209671074786, "learning_rate": 7.577442709274716e-06, "loss": 0.1174, "step": 3822 }, { "epoch": 1.04, "grad_norm": 1.8182884625637064, "learning_rate": 7.576179805495102e-06, "loss": 0.0924, "step": 3823 }, { "epoch": 1.04, "grad_norm": 2.0908213312737605, "learning_rate": 7.574916677924295e-06, "loss": 0.1067, "step": 3824 }, { "epoch": 1.04, "grad_norm": 2.0439814288268674, "learning_rate": 7.573653326672026e-06, "loss": 0.1147, "step": 3825 }, { "epoch": 1.04, "grad_norm": 2.234533068126777, "learning_rate": 7.572389751848037e-06, "loss": 0.1191, "step": 3826 }, { "epoch": 1.04, "grad_norm": 1.9979746648423296, "learning_rate": 7.571125953562095e-06, "loss": 0.1113, "step": 3827 }, { "epoch": 1.05, "grad_norm": 2.0435986277618454, "learning_rate": 7.569861931923989e-06, "loss": 0.1165, "step": 3828 }, { "epoch": 1.05, "grad_norm": 1.9489731784843585, "learning_rate": 7.5685976870435185e-06, "loss": 0.1088, "step": 3829 }, { "epoch": 1.05, "grad_norm": 2.0278297352860477, "learning_rate": 7.567333219030511e-06, "loss": 0.1234, "step": 3830 }, { "epoch": 1.05, "grad_norm": 2.1953538524287923, "learning_rate": 7.566068527994809e-06, "loss": 0.1457, "step": 3831 }, { "epoch": 1.05, "grad_norm": 1.7399577961527786, "learning_rate": 7.564803614046276e-06, "loss": 0.1027, "step": 3832 }, { "epoch": 1.05, "grad_norm": 2.100384229954873, "learning_rate": 7.563538477294793e-06, "loss": 0.1416, "step": 3833 }, { "epoch": 1.05, "grad_norm": 2.3432059152091114, "learning_rate": 7.562273117850264e-06, "loss": 0.1296, "step": 3834 }, { "epoch": 1.05, "grad_norm": 2.232665061085612, "learning_rate": 7.561007535822608e-06, "loss": 0.1246, "step": 3835 }, { "epoch": 1.05, "grad_norm": 2.03871537857279, "learning_rate": 7.5597417313217655e-06, "loss": 0.132, "step": 3836 }, { "epoch": 1.05, "grad_norm": 2.1104292454682887, "learning_rate": 7.558475704457698e-06, "loss": 0.115, "step": 3837 }, { "epoch": 1.05, "grad_norm": 2.134435422165674, "learning_rate": 7.557209455340382e-06, "loss": 0.1225, "step": 3838 }, { "epoch": 1.05, "grad_norm": 2.623820967149173, "learning_rate": 7.5559429840798185e-06, "loss": 0.1598, "step": 3839 }, { "epoch": 1.05, "grad_norm": 2.1751083310927912, "learning_rate": 7.554676290786023e-06, "loss": 0.1149, "step": 3840 }, { "epoch": 1.05, "grad_norm": 2.0688845722642686, "learning_rate": 7.553409375569032e-06, "loss": 0.1351, "step": 3841 }, { "epoch": 1.05, "grad_norm": 2.4061758756107574, "learning_rate": 7.552142238538905e-06, "loss": 0.1438, "step": 3842 }, { "epoch": 1.05, "grad_norm": 1.959849350279419, "learning_rate": 7.550874879805713e-06, "loss": 0.1201, "step": 3843 }, { "epoch": 1.05, "grad_norm": 2.195586633950834, "learning_rate": 7.549607299479554e-06, "loss": 0.1339, "step": 3844 }, { "epoch": 1.05, "grad_norm": 2.275838343695774, "learning_rate": 7.548339497670538e-06, "loss": 0.1185, "step": 3845 }, { "epoch": 1.05, "grad_norm": 2.0138949984997723, "learning_rate": 7.547071474488804e-06, "loss": 0.1185, "step": 3846 }, { "epoch": 1.05, "grad_norm": 2.440760860070949, "learning_rate": 7.5458032300445e-06, "loss": 0.1235, "step": 3847 }, { "epoch": 1.05, "grad_norm": 2.148225878770096, "learning_rate": 7.5445347644478e-06, "loss": 0.1159, "step": 3848 }, { "epoch": 1.05, "grad_norm": 2.0426812647668724, "learning_rate": 7.543266077808893e-06, "loss": 0.1301, "step": 3849 }, { "epoch": 1.05, "grad_norm": 4.040079652236069, "learning_rate": 7.541997170237989e-06, "loss": 0.1216, "step": 3850 }, { "epoch": 1.05, "grad_norm": 1.8651198756252683, "learning_rate": 7.540728041845319e-06, "loss": 0.1149, "step": 3851 }, { "epoch": 1.05, "grad_norm": 1.9001572836042329, "learning_rate": 7.539458692741131e-06, "loss": 0.1296, "step": 3852 }, { "epoch": 1.05, "grad_norm": 2.0255828982058763, "learning_rate": 7.538189123035691e-06, "loss": 0.1347, "step": 3853 }, { "epoch": 1.05, "grad_norm": 2.379063105768789, "learning_rate": 7.536919332839288e-06, "loss": 0.1487, "step": 3854 }, { "epoch": 1.05, "grad_norm": 2.5585705811249126, "learning_rate": 7.5356493222622265e-06, "loss": 0.153, "step": 3855 }, { "epoch": 1.05, "grad_norm": 2.3292888958864677, "learning_rate": 7.534379091414832e-06, "loss": 0.1378, "step": 3856 }, { "epoch": 1.05, "grad_norm": 1.9274956079730192, "learning_rate": 7.533108640407447e-06, "loss": 0.1258, "step": 3857 }, { "epoch": 1.05, "grad_norm": 2.042712402018402, "learning_rate": 7.5318379693504375e-06, "loss": 0.1077, "step": 3858 }, { "epoch": 1.05, "grad_norm": 2.3887910584551473, "learning_rate": 7.530567078354185e-06, "loss": 0.158, "step": 3859 }, { "epoch": 1.05, "grad_norm": 2.459217105232712, "learning_rate": 7.52929596752909e-06, "loss": 0.1375, "step": 3860 }, { "epoch": 1.05, "grad_norm": 2.4524818431952595, "learning_rate": 7.528024636985575e-06, "loss": 0.1578, "step": 3861 }, { "epoch": 1.05, "grad_norm": 2.217551888440984, "learning_rate": 7.5267530868340775e-06, "loss": 0.1331, "step": 3862 }, { "epoch": 1.05, "grad_norm": 2.217046594652403, "learning_rate": 7.525481317185057e-06, "loss": 0.1461, "step": 3863 }, { "epoch": 1.05, "grad_norm": 3.1391646749176862, "learning_rate": 7.524209328148995e-06, "loss": 0.1448, "step": 3864 }, { "epoch": 1.06, "grad_norm": 1.9448940713996767, "learning_rate": 7.5229371198363824e-06, "loss": 0.1051, "step": 3865 }, { "epoch": 1.06, "grad_norm": 2.1864608556086482, "learning_rate": 7.521664692357737e-06, "loss": 0.128, "step": 3866 }, { "epoch": 1.06, "grad_norm": 2.22166140657511, "learning_rate": 7.520392045823598e-06, "loss": 0.1465, "step": 3867 }, { "epoch": 1.06, "grad_norm": 1.9892002574237704, "learning_rate": 7.519119180344514e-06, "loss": 0.1391, "step": 3868 }, { "epoch": 1.06, "grad_norm": 1.9478256230445294, "learning_rate": 7.5178460960310605e-06, "loss": 0.1148, "step": 3869 }, { "epoch": 1.06, "grad_norm": 2.1832304119031076, "learning_rate": 7.51657279299383e-06, "loss": 0.1401, "step": 3870 }, { "epoch": 1.06, "grad_norm": 1.6455865180261677, "learning_rate": 7.515299271343434e-06, "loss": 0.0897, "step": 3871 }, { "epoch": 1.06, "grad_norm": 2.3708326081904145, "learning_rate": 7.514025531190499e-06, "loss": 0.1542, "step": 3872 }, { "epoch": 1.06, "grad_norm": 1.8935678478048354, "learning_rate": 7.512751572645679e-06, "loss": 0.1034, "step": 3873 }, { "epoch": 1.06, "grad_norm": 2.1230092424681124, "learning_rate": 7.5114773958196385e-06, "loss": 0.1224, "step": 3874 }, { "epoch": 1.06, "grad_norm": 2.027034265766682, "learning_rate": 7.510203000823066e-06, "loss": 0.1114, "step": 3875 }, { "epoch": 1.06, "grad_norm": 2.222676575068051, "learning_rate": 7.5089283877666664e-06, "loss": 0.1477, "step": 3876 }, { "epoch": 1.06, "grad_norm": 2.1273060136462276, "learning_rate": 7.507653556761166e-06, "loss": 0.1172, "step": 3877 }, { "epoch": 1.06, "grad_norm": 2.087109178883188, "learning_rate": 7.506378507917306e-06, "loss": 0.1227, "step": 3878 }, { "epoch": 1.06, "grad_norm": 2.143495203803203, "learning_rate": 7.505103241345853e-06, "loss": 0.1225, "step": 3879 }, { "epoch": 1.06, "grad_norm": 2.251255473238864, "learning_rate": 7.503827757157584e-06, "loss": 0.1318, "step": 3880 }, { "epoch": 1.06, "grad_norm": 2.165190009236253, "learning_rate": 7.5025520554633035e-06, "loss": 0.1035, "step": 3881 }, { "epoch": 1.06, "grad_norm": 2.176057028029448, "learning_rate": 7.501276136373831e-06, "loss": 0.1391, "step": 3882 }, { "epoch": 1.06, "grad_norm": 2.0142652621974895, "learning_rate": 7.500000000000001e-06, "loss": 0.1153, "step": 3883 }, { "epoch": 1.06, "grad_norm": 2.0047299306248467, "learning_rate": 7.498723646452673e-06, "loss": 0.1226, "step": 3884 }, { "epoch": 1.06, "grad_norm": 2.1608921058753405, "learning_rate": 7.497447075842725e-06, "loss": 0.1073, "step": 3885 }, { "epoch": 1.06, "grad_norm": 2.2977880579664305, "learning_rate": 7.496170288281049e-06, "loss": 0.1578, "step": 3886 }, { "epoch": 1.06, "grad_norm": 2.133512346587424, "learning_rate": 7.494893283878559e-06, "loss": 0.1007, "step": 3887 }, { "epoch": 1.06, "grad_norm": 2.1753885488805498, "learning_rate": 7.493616062746191e-06, "loss": 0.1184, "step": 3888 }, { "epoch": 1.06, "grad_norm": 2.1006548687026956, "learning_rate": 7.492338624994892e-06, "loss": 0.1376, "step": 3889 }, { "epoch": 1.06, "grad_norm": 1.8971780208166353, "learning_rate": 7.491060970735633e-06, "loss": 0.1123, "step": 3890 }, { "epoch": 1.06, "grad_norm": 2.125157506528644, "learning_rate": 7.489783100079407e-06, "loss": 0.1423, "step": 3891 }, { "epoch": 1.06, "grad_norm": 1.9354704301464662, "learning_rate": 7.488505013137215e-06, "loss": 0.1273, "step": 3892 }, { "epoch": 1.06, "grad_norm": 2.0189909509120425, "learning_rate": 7.4872267100200905e-06, "loss": 0.1281, "step": 3893 }, { "epoch": 1.06, "grad_norm": 1.9884918843137125, "learning_rate": 7.485948190839076e-06, "loss": 0.1123, "step": 3894 }, { "epoch": 1.06, "grad_norm": 1.9607487448291196, "learning_rate": 7.484669455705235e-06, "loss": 0.1321, "step": 3895 }, { "epoch": 1.06, "grad_norm": 1.8519986073858707, "learning_rate": 7.483390504729651e-06, "loss": 0.1116, "step": 3896 }, { "epoch": 1.06, "grad_norm": 2.1195844912532094, "learning_rate": 7.4821113380234266e-06, "loss": 0.1325, "step": 3897 }, { "epoch": 1.06, "grad_norm": 2.329881985749329, "learning_rate": 7.48083195569768e-06, "loss": 0.1309, "step": 3898 }, { "epoch": 1.06, "grad_norm": 2.127672858599593, "learning_rate": 7.479552357863553e-06, "loss": 0.1207, "step": 3899 }, { "epoch": 1.06, "grad_norm": 1.8440888140963385, "learning_rate": 7.478272544632204e-06, "loss": 0.1173, "step": 3900 }, { "epoch": 1.06, "grad_norm": 2.094780808136034, "learning_rate": 7.476992516114805e-06, "loss": 0.1201, "step": 3901 }, { "epoch": 1.07, "grad_norm": 2.3466968102559167, "learning_rate": 7.4757122724225575e-06, "loss": 0.1301, "step": 3902 }, { "epoch": 1.07, "grad_norm": 2.038753418217479, "learning_rate": 7.474431813666669e-06, "loss": 0.1226, "step": 3903 }, { "epoch": 1.07, "grad_norm": 2.197397302185069, "learning_rate": 7.473151139958378e-06, "loss": 0.1126, "step": 3904 }, { "epoch": 1.07, "grad_norm": 2.0627679163946873, "learning_rate": 7.4718702514089324e-06, "loss": 0.1351, "step": 3905 }, { "epoch": 1.07, "grad_norm": 2.0491556330215053, "learning_rate": 7.470589148129603e-06, "loss": 0.1371, "step": 3906 }, { "epoch": 1.07, "grad_norm": 2.0169410858543992, "learning_rate": 7.469307830231679e-06, "loss": 0.1333, "step": 3907 }, { "epoch": 1.07, "grad_norm": 2.185654585940526, "learning_rate": 7.468026297826468e-06, "loss": 0.1582, "step": 3908 }, { "epoch": 1.07, "grad_norm": 2.356927378141032, "learning_rate": 7.4667445510252945e-06, "loss": 0.1439, "step": 3909 }, { "epoch": 1.07, "grad_norm": 14.742003771019665, "learning_rate": 7.465462589939504e-06, "loss": 0.1368, "step": 3910 }, { "epoch": 1.07, "grad_norm": 1.7950421170417288, "learning_rate": 7.4641804146804605e-06, "loss": 0.1153, "step": 3911 }, { "epoch": 1.07, "grad_norm": 2.128224600591649, "learning_rate": 7.462898025359544e-06, "loss": 0.112, "step": 3912 }, { "epoch": 1.07, "grad_norm": 2.1396326082975095, "learning_rate": 7.461615422088155e-06, "loss": 0.1272, "step": 3913 }, { "epoch": 1.07, "grad_norm": 2.0025502724788504, "learning_rate": 7.460332604977716e-06, "loss": 0.1202, "step": 3914 }, { "epoch": 1.07, "grad_norm": 2.2624811154948556, "learning_rate": 7.4590495741396585e-06, "loss": 0.1349, "step": 3915 }, { "epoch": 1.07, "grad_norm": 1.9355326775586001, "learning_rate": 7.457766329685444e-06, "loss": 0.1259, "step": 3916 }, { "epoch": 1.07, "grad_norm": 2.0140273416791428, "learning_rate": 7.456482871726545e-06, "loss": 0.1255, "step": 3917 }, { "epoch": 1.07, "grad_norm": 1.9606837035385274, "learning_rate": 7.4551992003744545e-06, "loss": 0.1172, "step": 3918 }, { "epoch": 1.07, "grad_norm": 2.0180270701257226, "learning_rate": 7.4539153157406825e-06, "loss": 0.1306, "step": 3919 }, { "epoch": 1.07, "grad_norm": 1.856103025005708, "learning_rate": 7.4526312179367656e-06, "loss": 0.1179, "step": 3920 }, { "epoch": 1.07, "grad_norm": 2.1450723568870793, "learning_rate": 7.451346907074245e-06, "loss": 0.1231, "step": 3921 }, { "epoch": 1.07, "grad_norm": 2.2612429769892226, "learning_rate": 7.450062383264692e-06, "loss": 0.1434, "step": 3922 }, { "epoch": 1.07, "grad_norm": 2.518162038010842, "learning_rate": 7.448777646619693e-06, "loss": 0.1241, "step": 3923 }, { "epoch": 1.07, "grad_norm": 2.067242591568339, "learning_rate": 7.44749269725085e-06, "loss": 0.1378, "step": 3924 }, { "epoch": 1.07, "grad_norm": 2.0774798553528186, "learning_rate": 7.4462075352697875e-06, "loss": 0.1205, "step": 3925 }, { "epoch": 1.07, "grad_norm": 2.075024819502733, "learning_rate": 7.444922160788146e-06, "loss": 0.1303, "step": 3926 }, { "epoch": 1.07, "grad_norm": 1.926040180986208, "learning_rate": 7.443636573917585e-06, "loss": 0.113, "step": 3927 }, { "epoch": 1.07, "grad_norm": 2.384360386854931, "learning_rate": 7.442350774769782e-06, "loss": 0.1411, "step": 3928 }, { "epoch": 1.07, "grad_norm": 1.673943894788367, "learning_rate": 7.441064763456437e-06, "loss": 0.0857, "step": 3929 }, { "epoch": 1.07, "grad_norm": 2.521919680103763, "learning_rate": 7.439778540089261e-06, "loss": 0.1559, "step": 3930 }, { "epoch": 1.07, "grad_norm": 2.513935300832426, "learning_rate": 7.438492104779989e-06, "loss": 0.1274, "step": 3931 }, { "epoch": 1.07, "grad_norm": 2.176807780283664, "learning_rate": 7.437205457640374e-06, "loss": 0.1044, "step": 3932 }, { "epoch": 1.07, "grad_norm": 2.1076513224284428, "learning_rate": 7.435918598782183e-06, "loss": 0.1228, "step": 3933 }, { "epoch": 1.07, "grad_norm": 2.279043079419693, "learning_rate": 7.434631528317209e-06, "loss": 0.1522, "step": 3934 }, { "epoch": 1.07, "grad_norm": 2.204071430556285, "learning_rate": 7.433344246357257e-06, "loss": 0.13, "step": 3935 }, { "epoch": 1.07, "grad_norm": 2.2060846039932938, "learning_rate": 7.432056753014152e-06, "loss": 0.1438, "step": 3936 }, { "epoch": 1.07, "grad_norm": 2.1204035655348754, "learning_rate": 7.4307690483997365e-06, "loss": 0.1246, "step": 3937 }, { "epoch": 1.08, "grad_norm": 2.1186187320136267, "learning_rate": 7.429481132625876e-06, "loss": 0.1421, "step": 3938 }, { "epoch": 1.08, "grad_norm": 1.9005706004579819, "learning_rate": 7.428193005804449e-06, "loss": 0.1079, "step": 3939 }, { "epoch": 1.08, "grad_norm": 2.083741061405799, "learning_rate": 7.426904668047352e-06, "loss": 0.1178, "step": 3940 }, { "epoch": 1.08, "grad_norm": 2.063866709633579, "learning_rate": 7.425616119466508e-06, "loss": 0.1286, "step": 3941 }, { "epoch": 1.08, "grad_norm": 1.817758440126498, "learning_rate": 7.424327360173847e-06, "loss": 0.1023, "step": 3942 }, { "epoch": 1.08, "grad_norm": 2.013458294290249, "learning_rate": 7.4230383902813255e-06, "loss": 0.1321, "step": 3943 }, { "epoch": 1.08, "grad_norm": 2.2409224472735123, "learning_rate": 7.421749209900916e-06, "loss": 0.1172, "step": 3944 }, { "epoch": 1.08, "grad_norm": 2.3202713111420383, "learning_rate": 7.420459819144605e-06, "loss": 0.1297, "step": 3945 }, { "epoch": 1.08, "grad_norm": 1.9171512332914806, "learning_rate": 7.419170218124405e-06, "loss": 0.116, "step": 3946 }, { "epoch": 1.08, "grad_norm": 1.926502387883841, "learning_rate": 7.417880406952343e-06, "loss": 0.1188, "step": 3947 }, { "epoch": 1.08, "grad_norm": 2.1402951453430905, "learning_rate": 7.4165903857404606e-06, "loss": 0.1121, "step": 3948 }, { "epoch": 1.08, "grad_norm": 2.146439520986459, "learning_rate": 7.4153001546008245e-06, "loss": 0.14, "step": 3949 }, { "epoch": 1.08, "grad_norm": 1.9027406826044377, "learning_rate": 7.414009713645516e-06, "loss": 0.1156, "step": 3950 }, { "epoch": 1.08, "grad_norm": 2.3179571518912288, "learning_rate": 7.412719062986632e-06, "loss": 0.1386, "step": 3951 }, { "epoch": 1.08, "grad_norm": 2.1585023801670684, "learning_rate": 7.411428202736293e-06, "loss": 0.1269, "step": 3952 }, { "epoch": 1.08, "grad_norm": 1.7807928987543278, "learning_rate": 7.410137133006636e-06, "loss": 0.1023, "step": 3953 }, { "epoch": 1.08, "grad_norm": 2.2934532102517853, "learning_rate": 7.408845853909813e-06, "loss": 0.1208, "step": 3954 }, { "epoch": 1.08, "grad_norm": 2.2432775515713814, "learning_rate": 7.407554365557999e-06, "loss": 0.1272, "step": 3955 }, { "epoch": 1.08, "grad_norm": 1.69488860170964, "learning_rate": 7.406262668063383e-06, "loss": 0.1003, "step": 3956 }, { "epoch": 1.08, "grad_norm": 2.082637074657893, "learning_rate": 7.404970761538175e-06, "loss": 0.1256, "step": 3957 }, { "epoch": 1.08, "grad_norm": 2.277575887253905, "learning_rate": 7.403678646094602e-06, "loss": 0.1449, "step": 3958 }, { "epoch": 1.08, "grad_norm": 2.257923864518211, "learning_rate": 7.40238632184491e-06, "loss": 0.1316, "step": 3959 }, { "epoch": 1.08, "grad_norm": 2.0746344557535656, "learning_rate": 7.40109378890136e-06, "loss": 0.1217, "step": 3960 }, { "epoch": 1.08, "grad_norm": 1.8769338531399027, "learning_rate": 7.399801047376235e-06, "loss": 0.111, "step": 3961 }, { "epoch": 1.08, "grad_norm": 1.8318698550524606, "learning_rate": 7.398508097381837e-06, "loss": 0.111, "step": 3962 }, { "epoch": 1.08, "grad_norm": 2.1451980967544824, "learning_rate": 7.397214939030479e-06, "loss": 0.1562, "step": 3963 }, { "epoch": 1.08, "grad_norm": 2.44948514918435, "learning_rate": 7.395921572434501e-06, "loss": 0.1159, "step": 3964 }, { "epoch": 1.08, "grad_norm": 1.9230492125242697, "learning_rate": 7.394627997706256e-06, "loss": 0.1095, "step": 3965 }, { "epoch": 1.08, "grad_norm": 1.7568446334639645, "learning_rate": 7.393334214958114e-06, "loss": 0.0903, "step": 3966 }, { "epoch": 1.08, "grad_norm": 1.9733458104791217, "learning_rate": 7.392040224302468e-06, "loss": 0.1214, "step": 3967 }, { "epoch": 1.08, "grad_norm": 2.4210225728514954, "learning_rate": 7.390746025851725e-06, "loss": 0.1368, "step": 3968 }, { "epoch": 1.08, "grad_norm": 1.6787517447564464, "learning_rate": 7.389451619718311e-06, "loss": 0.0943, "step": 3969 }, { "epoch": 1.08, "grad_norm": 2.2614749821578637, "learning_rate": 7.388157006014669e-06, "loss": 0.1329, "step": 3970 }, { "epoch": 1.08, "grad_norm": 2.239093522756117, "learning_rate": 7.386862184853264e-06, "loss": 0.1227, "step": 3971 }, { "epoch": 1.08, "grad_norm": 2.2995402571318095, "learning_rate": 7.3855671563465745e-06, "loss": 0.1351, "step": 3972 }, { "epoch": 1.08, "grad_norm": 1.968195674552456, "learning_rate": 7.3842719206071e-06, "loss": 0.1166, "step": 3973 }, { "epoch": 1.08, "grad_norm": 2.241333931537465, "learning_rate": 7.382976477747357e-06, "loss": 0.1467, "step": 3974 }, { "epoch": 1.09, "grad_norm": 20.921668138100443, "learning_rate": 7.381680827879877e-06, "loss": 0.1516, "step": 3975 }, { "epoch": 1.09, "grad_norm": 1.9626431379187887, "learning_rate": 7.380384971117215e-06, "loss": 0.1199, "step": 3976 }, { "epoch": 1.09, "grad_norm": 2.273551048713562, "learning_rate": 7.379088907571942e-06, "loss": 0.1522, "step": 3977 }, { "epoch": 1.09, "grad_norm": 21.837456878779356, "learning_rate": 7.377792637356644e-06, "loss": 0.1428, "step": 3978 }, { "epoch": 1.09, "grad_norm": 1.833715634757583, "learning_rate": 7.376496160583928e-06, "loss": 0.1185, "step": 3979 }, { "epoch": 1.09, "grad_norm": 2.096116938590306, "learning_rate": 7.3751994773664195e-06, "loss": 0.1303, "step": 3980 }, { "epoch": 1.09, "grad_norm": 2.274181786895908, "learning_rate": 7.373902587816758e-06, "loss": 0.1305, "step": 3981 }, { "epoch": 1.09, "grad_norm": 1.9311979803562314, "learning_rate": 7.372605492047605e-06, "loss": 0.114, "step": 3982 }, { "epoch": 1.09, "grad_norm": 2.129251198279499, "learning_rate": 7.37130819017164e-06, "loss": 0.1392, "step": 3983 }, { "epoch": 1.09, "grad_norm": 2.2371882114083426, "learning_rate": 7.370010682301556e-06, "loss": 0.136, "step": 3984 }, { "epoch": 1.09, "grad_norm": 2.9793736302005662, "learning_rate": 7.368712968550068e-06, "loss": 0.1156, "step": 3985 }, { "epoch": 1.09, "grad_norm": 1.8591369501604358, "learning_rate": 7.367415049029909e-06, "loss": 0.1155, "step": 3986 }, { "epoch": 1.09, "grad_norm": 2.2948435519030217, "learning_rate": 7.3661169238538255e-06, "loss": 0.1251, "step": 3987 }, { "epoch": 1.09, "grad_norm": 1.9748317862585145, "learning_rate": 7.364818593134586e-06, "loss": 0.1225, "step": 3988 }, { "epoch": 1.09, "grad_norm": 2.1696537685157304, "learning_rate": 7.363520056984977e-06, "loss": 0.1463, "step": 3989 }, { "epoch": 1.09, "grad_norm": 2.0301156159973863, "learning_rate": 7.362221315517801e-06, "loss": 0.1226, "step": 3990 }, { "epoch": 1.09, "grad_norm": 2.3661832290675964, "learning_rate": 7.3609223688458775e-06, "loss": 0.1212, "step": 3991 }, { "epoch": 1.09, "grad_norm": 2.1250212628680742, "learning_rate": 7.359623217082047e-06, "loss": 0.1072, "step": 3992 }, { "epoch": 1.09, "grad_norm": 2.0771167817979572, "learning_rate": 7.358323860339165e-06, "loss": 0.1062, "step": 3993 }, { "epoch": 1.09, "grad_norm": 2.121546310168771, "learning_rate": 7.357024298730107e-06, "loss": 0.13, "step": 3994 }, { "epoch": 1.09, "grad_norm": 2.2314031551528086, "learning_rate": 7.355724532367763e-06, "loss": 0.1374, "step": 3995 }, { "epoch": 1.09, "grad_norm": 2.0199067337626078, "learning_rate": 7.354424561365046e-06, "loss": 0.1128, "step": 3996 }, { "epoch": 1.09, "grad_norm": 2.3121695047480118, "learning_rate": 7.35312438583488e-06, "loss": 0.1448, "step": 3997 }, { "epoch": 1.09, "grad_norm": 1.8886272701778675, "learning_rate": 7.351824005890213e-06, "loss": 0.1146, "step": 3998 }, { "epoch": 1.09, "grad_norm": 2.250740809519039, "learning_rate": 7.350523421644008e-06, "loss": 0.1392, "step": 3999 }, { "epoch": 1.09, "grad_norm": 2.1534237343358567, "learning_rate": 7.349222633209246e-06, "loss": 0.1277, "step": 4000 }, { "epoch": 1.09, "grad_norm": 2.079741345723657, "learning_rate": 7.347921640698925e-06, "loss": 0.124, "step": 4001 }, { "epoch": 1.09, "grad_norm": 2.4299492899779693, "learning_rate": 7.3466204442260605e-06, "loss": 0.1246, "step": 4002 }, { "epoch": 1.09, "grad_norm": 2.1011338876661463, "learning_rate": 7.345319043903689e-06, "loss": 0.1111, "step": 4003 }, { "epoch": 1.09, "grad_norm": 1.7390941763705936, "learning_rate": 7.344017439844862e-06, "loss": 0.1093, "step": 4004 }, { "epoch": 1.09, "grad_norm": 2.077205117707561, "learning_rate": 7.342715632162647e-06, "loss": 0.1172, "step": 4005 }, { "epoch": 1.09, "grad_norm": 2.153527928267279, "learning_rate": 7.3414136209701335e-06, "loss": 0.0958, "step": 4006 }, { "epoch": 1.09, "grad_norm": 2.2102528197082947, "learning_rate": 7.340111406380425e-06, "loss": 0.1179, "step": 4007 }, { "epoch": 1.09, "grad_norm": 2.1591957922579565, "learning_rate": 7.338808988506644e-06, "loss": 0.1329, "step": 4008 }, { "epoch": 1.09, "grad_norm": 1.9922621824714042, "learning_rate": 7.337506367461933e-06, "loss": 0.1247, "step": 4009 }, { "epoch": 1.09, "grad_norm": 2.2014104256536773, "learning_rate": 7.336203543359446e-06, "loss": 0.129, "step": 4010 }, { "epoch": 1.1, "grad_norm": 1.8946157551562837, "learning_rate": 7.3349005163123625e-06, "loss": 0.1038, "step": 4011 }, { "epoch": 1.1, "grad_norm": 2.0086623417674847, "learning_rate": 7.333597286433873e-06, "loss": 0.1173, "step": 4012 }, { "epoch": 1.1, "grad_norm": 2.2230856327408817, "learning_rate": 7.33229385383719e-06, "loss": 0.1224, "step": 4013 }, { "epoch": 1.1, "grad_norm": 2.5089897680310016, "learning_rate": 7.330990218635541e-06, "loss": 0.1147, "step": 4014 }, { "epoch": 1.1, "grad_norm": 2.14394160648444, "learning_rate": 7.329686380942172e-06, "loss": 0.1332, "step": 4015 }, { "epoch": 1.1, "grad_norm": 2.2508440942690697, "learning_rate": 7.3283823408703466e-06, "loss": 0.1132, "step": 4016 }, { "epoch": 1.1, "grad_norm": 2.1391547224960457, "learning_rate": 7.327078098533347e-06, "loss": 0.1365, "step": 4017 }, { "epoch": 1.1, "grad_norm": 2.1416846784491317, "learning_rate": 7.3257736540444715e-06, "loss": 0.123, "step": 4018 }, { "epoch": 1.1, "grad_norm": 1.927397974785919, "learning_rate": 7.324469007517035e-06, "loss": 0.1172, "step": 4019 }, { "epoch": 1.1, "grad_norm": 1.801071893702841, "learning_rate": 7.323164159064372e-06, "loss": 0.1252, "step": 4020 }, { "epoch": 1.1, "grad_norm": 2.3330053919857847, "learning_rate": 7.321859108799836e-06, "loss": 0.1234, "step": 4021 }, { "epoch": 1.1, "grad_norm": 2.09285207012069, "learning_rate": 7.320553856836792e-06, "loss": 0.1045, "step": 4022 }, { "epoch": 1.1, "grad_norm": 1.84329052109295, "learning_rate": 7.319248403288629e-06, "loss": 0.1056, "step": 4023 }, { "epoch": 1.1, "grad_norm": 2.11669083553945, "learning_rate": 7.317942748268753e-06, "loss": 0.1342, "step": 4024 }, { "epoch": 1.1, "grad_norm": 2.015587895149446, "learning_rate": 7.31663689189058e-06, "loss": 0.1282, "step": 4025 }, { "epoch": 1.1, "grad_norm": 2.395268345813073, "learning_rate": 7.315330834267553e-06, "loss": 0.1415, "step": 4026 }, { "epoch": 1.1, "grad_norm": 2.0938613208144936, "learning_rate": 7.31402457551313e-06, "loss": 0.123, "step": 4027 }, { "epoch": 1.1, "grad_norm": 1.9680644762034556, "learning_rate": 7.31271811574078e-06, "loss": 0.1211, "step": 4028 }, { "epoch": 1.1, "grad_norm": 2.115640362741935, "learning_rate": 7.311411455063997e-06, "loss": 0.1285, "step": 4029 }, { "epoch": 1.1, "grad_norm": 2.1288514908174747, "learning_rate": 7.31010459359629e-06, "loss": 0.1331, "step": 4030 }, { "epoch": 1.1, "grad_norm": 2.1912549131546237, "learning_rate": 7.308797531451185e-06, "loss": 0.1472, "step": 4031 }, { "epoch": 1.1, "grad_norm": 2.2871869773379374, "learning_rate": 7.307490268742224e-06, "loss": 0.1267, "step": 4032 }, { "epoch": 1.1, "grad_norm": 2.0732219243656513, "learning_rate": 7.306182805582972e-06, "loss": 0.1304, "step": 4033 }, { "epoch": 1.1, "grad_norm": 2.120908086417422, "learning_rate": 7.304875142087005e-06, "loss": 0.1164, "step": 4034 }, { "epoch": 1.1, "grad_norm": 1.9752969660723898, "learning_rate": 7.303567278367918e-06, "loss": 0.1249, "step": 4035 }, { "epoch": 1.1, "grad_norm": 2.305683229994794, "learning_rate": 7.302259214539327e-06, "loss": 0.14, "step": 4036 }, { "epoch": 1.1, "grad_norm": 2.0519892133694357, "learning_rate": 7.300950950714859e-06, "loss": 0.1298, "step": 4037 }, { "epoch": 1.1, "grad_norm": 2.229621746050729, "learning_rate": 7.299642487008166e-06, "loss": 0.1185, "step": 4038 }, { "epoch": 1.1, "grad_norm": 2.1076792448967523, "learning_rate": 7.298333823532913e-06, "loss": 0.1312, "step": 4039 }, { "epoch": 1.1, "grad_norm": 1.8214709736410473, "learning_rate": 7.297024960402779e-06, "loss": 0.1308, "step": 4040 }, { "epoch": 1.1, "grad_norm": 2.099988719820571, "learning_rate": 7.295715897731468e-06, "loss": 0.1289, "step": 4041 }, { "epoch": 1.1, "grad_norm": 1.9086126172107463, "learning_rate": 7.294406635632696e-06, "loss": 0.125, "step": 4042 }, { "epoch": 1.1, "grad_norm": 1.9388755983126214, "learning_rate": 7.293097174220199e-06, "loss": 0.1373, "step": 4043 }, { "epoch": 1.1, "grad_norm": 1.9252420528037315, "learning_rate": 7.291787513607727e-06, "loss": 0.1267, "step": 4044 }, { "epoch": 1.1, "grad_norm": 1.943952817642541, "learning_rate": 7.290477653909052e-06, "loss": 0.1158, "step": 4045 }, { "epoch": 1.1, "grad_norm": 2.186985218661055, "learning_rate": 7.289167595237957e-06, "loss": 0.139, "step": 4046 }, { "epoch": 1.1, "grad_norm": 2.1620383014593028, "learning_rate": 7.28785733770825e-06, "loss": 0.1332, "step": 4047 }, { "epoch": 1.11, "grad_norm": 2.012440181711628, "learning_rate": 7.28654688143375e-06, "loss": 0.1339, "step": 4048 }, { "epoch": 1.11, "grad_norm": 2.0694763750739615, "learning_rate": 7.285236226528297e-06, "loss": 0.1259, "step": 4049 }, { "epoch": 1.11, "grad_norm": 1.879491122272998, "learning_rate": 7.283925373105745e-06, "loss": 0.1021, "step": 4050 }, { "epoch": 1.11, "grad_norm": 2.065723860322678, "learning_rate": 7.282614321279969e-06, "loss": 0.1445, "step": 4051 }, { "epoch": 1.11, "grad_norm": 2.3948578867793446, "learning_rate": 7.281303071164858e-06, "loss": 0.1073, "step": 4052 }, { "epoch": 1.11, "grad_norm": 2.2230764969291266, "learning_rate": 7.279991622874319e-06, "loss": 0.1155, "step": 4053 }, { "epoch": 1.11, "grad_norm": 2.3494294967172453, "learning_rate": 7.278679976522279e-06, "loss": 0.1298, "step": 4054 }, { "epoch": 1.11, "grad_norm": 2.0682955922314874, "learning_rate": 7.277368132222678e-06, "loss": 0.122, "step": 4055 }, { "epoch": 1.11, "grad_norm": 2.1057988091073554, "learning_rate": 7.276056090089475e-06, "loss": 0.1307, "step": 4056 }, { "epoch": 1.11, "grad_norm": 2.066580174324056, "learning_rate": 7.274743850236649e-06, "loss": 0.1206, "step": 4057 }, { "epoch": 1.11, "grad_norm": 2.059543126056794, "learning_rate": 7.273431412778189e-06, "loss": 0.1411, "step": 4058 }, { "epoch": 1.11, "grad_norm": 1.8810898036468067, "learning_rate": 7.272118777828109e-06, "loss": 0.1037, "step": 4059 }, { "epoch": 1.11, "grad_norm": 2.05918874136541, "learning_rate": 7.270805945500436e-06, "loss": 0.1222, "step": 4060 }, { "epoch": 1.11, "grad_norm": 1.9418198299457987, "learning_rate": 7.269492915909214e-06, "loss": 0.0895, "step": 4061 }, { "epoch": 1.11, "grad_norm": 2.0836958443411864, "learning_rate": 7.268179689168507e-06, "loss": 0.1376, "step": 4062 }, { "epoch": 1.11, "grad_norm": 2.16593274620396, "learning_rate": 7.266866265392394e-06, "loss": 0.1376, "step": 4063 }, { "epoch": 1.11, "grad_norm": 2.022972715275929, "learning_rate": 7.265552644694969e-06, "loss": 0.1225, "step": 4064 }, { "epoch": 1.11, "grad_norm": 2.0873612386337173, "learning_rate": 7.264238827190346e-06, "loss": 0.1178, "step": 4065 }, { "epoch": 1.11, "grad_norm": 1.985209312165002, "learning_rate": 7.2629248129926576e-06, "loss": 0.1114, "step": 4066 }, { "epoch": 1.11, "grad_norm": 2.04820684925789, "learning_rate": 7.26161060221605e-06, "loss": 0.1193, "step": 4067 }, { "epoch": 1.11, "grad_norm": 2.174628697363053, "learning_rate": 7.2602961949746886e-06, "loss": 0.1323, "step": 4068 }, { "epoch": 1.11, "grad_norm": 1.8469254903009882, "learning_rate": 7.258981591382756e-06, "loss": 0.1181, "step": 4069 }, { "epoch": 1.11, "grad_norm": 2.190923853319206, "learning_rate": 7.257666791554448e-06, "loss": 0.1345, "step": 4070 }, { "epoch": 1.11, "grad_norm": 2.2819848208376468, "learning_rate": 7.256351795603982e-06, "loss": 0.1449, "step": 4071 }, { "epoch": 1.11, "grad_norm": 2.1012960945037884, "learning_rate": 7.255036603645593e-06, "loss": 0.1231, "step": 4072 }, { "epoch": 1.11, "grad_norm": 1.9111178390492092, "learning_rate": 7.253721215793528e-06, "loss": 0.1205, "step": 4073 }, { "epoch": 1.11, "grad_norm": 1.9868463170310164, "learning_rate": 7.252405632162054e-06, "loss": 0.1326, "step": 4074 }, { "epoch": 1.11, "grad_norm": 1.8498058036585032, "learning_rate": 7.251089852865458e-06, "loss": 0.1111, "step": 4075 }, { "epoch": 1.11, "grad_norm": 3.130595669503501, "learning_rate": 7.2497738780180375e-06, "loss": 0.1496, "step": 4076 }, { "epoch": 1.11, "grad_norm": 1.7243798034060782, "learning_rate": 7.248457707734113e-06, "loss": 0.1065, "step": 4077 }, { "epoch": 1.11, "grad_norm": 2.021871491947951, "learning_rate": 7.247141342128017e-06, "loss": 0.098, "step": 4078 }, { "epoch": 1.11, "grad_norm": 1.8678339379041473, "learning_rate": 7.245824781314104e-06, "loss": 0.1021, "step": 4079 }, { "epoch": 1.11, "grad_norm": 2.1691268925388405, "learning_rate": 7.24450802540674e-06, "loss": 0.1184, "step": 4080 }, { "epoch": 1.11, "grad_norm": 2.0599849185264376, "learning_rate": 7.243191074520314e-06, "loss": 0.1128, "step": 4081 }, { "epoch": 1.11, "grad_norm": 2.397295444122207, "learning_rate": 7.2418739287692266e-06, "loss": 0.1494, "step": 4082 }, { "epoch": 1.11, "grad_norm": 1.8887882863137924, "learning_rate": 7.240556588267897e-06, "loss": 0.1334, "step": 4083 }, { "epoch": 1.11, "grad_norm": 2.0671605015297705, "learning_rate": 7.2392390531307634e-06, "loss": 0.1297, "step": 4084 }, { "epoch": 1.12, "grad_norm": 1.9278132265550376, "learning_rate": 7.237921323472279e-06, "loss": 0.1235, "step": 4085 }, { "epoch": 1.12, "grad_norm": 2.126243458354478, "learning_rate": 7.236603399406914e-06, "loss": 0.1264, "step": 4086 }, { "epoch": 1.12, "grad_norm": 2.073927395193306, "learning_rate": 7.235285281049154e-06, "loss": 0.132, "step": 4087 }, { "epoch": 1.12, "grad_norm": 2.00195967613182, "learning_rate": 7.233966968513506e-06, "loss": 0.1181, "step": 4088 }, { "epoch": 1.12, "grad_norm": 1.9167174467624488, "learning_rate": 7.23264846191449e-06, "loss": 0.1247, "step": 4089 }, { "epoch": 1.12, "grad_norm": 1.8760304689855907, "learning_rate": 7.231329761366642e-06, "loss": 0.1191, "step": 4090 }, { "epoch": 1.12, "grad_norm": 1.9159775973618893, "learning_rate": 7.230010866984518e-06, "loss": 0.1162, "step": 4091 }, { "epoch": 1.12, "grad_norm": 1.9277798756667899, "learning_rate": 7.2286917788826926e-06, "loss": 0.0942, "step": 4092 }, { "epoch": 1.12, "grad_norm": 2.111950398625085, "learning_rate": 7.2273724971757484e-06, "loss": 0.1279, "step": 4093 }, { "epoch": 1.12, "grad_norm": 1.9492437452475793, "learning_rate": 7.226053021978295e-06, "loss": 0.1159, "step": 4094 }, { "epoch": 1.12, "grad_norm": 2.1127674985376426, "learning_rate": 7.2247333534049536e-06, "loss": 0.1219, "step": 4095 }, { "epoch": 1.12, "grad_norm": 2.337096983319986, "learning_rate": 7.2234134915703616e-06, "loss": 0.1232, "step": 4096 }, { "epoch": 1.12, "grad_norm": 2.191416578479189, "learning_rate": 7.222093436589175e-06, "loss": 0.1345, "step": 4097 }, { "epoch": 1.12, "grad_norm": 1.9533956736046576, "learning_rate": 7.220773188576068e-06, "loss": 0.1088, "step": 4098 }, { "epoch": 1.12, "grad_norm": 1.905760269854261, "learning_rate": 7.219452747645728e-06, "loss": 0.1023, "step": 4099 }, { "epoch": 1.12, "grad_norm": 2.3912067840711795, "learning_rate": 7.218132113912859e-06, "loss": 0.1539, "step": 4100 }, { "epoch": 1.12, "grad_norm": 2.210437236899646, "learning_rate": 7.216811287492189e-06, "loss": 0.137, "step": 4101 }, { "epoch": 1.12, "grad_norm": 2.2701933019852327, "learning_rate": 7.215490268498453e-06, "loss": 0.1295, "step": 4102 }, { "epoch": 1.12, "grad_norm": 2.2963509680678045, "learning_rate": 7.2141690570464074e-06, "loss": 0.1415, "step": 4103 }, { "epoch": 1.12, "grad_norm": 1.953002537439698, "learning_rate": 7.212847653250828e-06, "loss": 0.1015, "step": 4104 }, { "epoch": 1.12, "grad_norm": 2.2133252702056136, "learning_rate": 7.211526057226502e-06, "loss": 0.1454, "step": 4105 }, { "epoch": 1.12, "grad_norm": 1.7610991350213612, "learning_rate": 7.2102042690882356e-06, "loss": 0.0934, "step": 4106 }, { "epoch": 1.12, "grad_norm": 2.298450285512435, "learning_rate": 7.208882288950854e-06, "loss": 0.146, "step": 4107 }, { "epoch": 1.12, "grad_norm": 2.237744583653582, "learning_rate": 7.207560116929192e-06, "loss": 0.1247, "step": 4108 }, { "epoch": 1.12, "grad_norm": 1.8205207035711697, "learning_rate": 7.20623775313811e-06, "loss": 0.1116, "step": 4109 }, { "epoch": 1.12, "grad_norm": 1.861896084213048, "learning_rate": 7.204915197692481e-06, "loss": 0.1191, "step": 4110 }, { "epoch": 1.12, "grad_norm": 2.107175259116252, "learning_rate": 7.203592450707193e-06, "loss": 0.1435, "step": 4111 }, { "epoch": 1.12, "grad_norm": 1.717745939394215, "learning_rate": 7.202269512297153e-06, "loss": 0.1128, "step": 4112 }, { "epoch": 1.12, "grad_norm": 2.291724950550943, "learning_rate": 7.200946382577284e-06, "loss": 0.1233, "step": 4113 }, { "epoch": 1.12, "grad_norm": 2.0100059226373426, "learning_rate": 7.199623061662524e-06, "loss": 0.134, "step": 4114 }, { "epoch": 1.12, "grad_norm": 2.061720671735907, "learning_rate": 7.1982995496678306e-06, "loss": 0.1285, "step": 4115 }, { "epoch": 1.12, "grad_norm": 1.9410259925562126, "learning_rate": 7.196975846708176e-06, "loss": 0.1054, "step": 4116 }, { "epoch": 1.12, "grad_norm": 2.0518441549823327, "learning_rate": 7.19565195289855e-06, "loss": 0.1256, "step": 4117 }, { "epoch": 1.12, "grad_norm": 1.7990715197265263, "learning_rate": 7.194327868353958e-06, "loss": 0.0977, "step": 4118 }, { "epoch": 1.12, "grad_norm": 2.2207897239223384, "learning_rate": 7.193003593189423e-06, "loss": 0.122, "step": 4119 }, { "epoch": 1.12, "grad_norm": 2.2162668701755543, "learning_rate": 7.191679127519981e-06, "loss": 0.13, "step": 4120 }, { "epoch": 1.13, "grad_norm": 1.871066645582895, "learning_rate": 7.190354471460692e-06, "loss": 0.1225, "step": 4121 }, { "epoch": 1.13, "grad_norm": 2.104715667461744, "learning_rate": 7.189029625126627e-06, "loss": 0.1234, "step": 4122 }, { "epoch": 1.13, "grad_norm": 2.05382869277293, "learning_rate": 7.187704588632871e-06, "loss": 0.14, "step": 4123 }, { "epoch": 1.13, "grad_norm": 1.9002726339466374, "learning_rate": 7.186379362094533e-06, "loss": 0.0959, "step": 4124 }, { "epoch": 1.13, "grad_norm": 1.8299457765103404, "learning_rate": 7.185053945626734e-06, "loss": 0.1054, "step": 4125 }, { "epoch": 1.13, "grad_norm": 1.9894973271417298, "learning_rate": 7.183728339344611e-06, "loss": 0.1181, "step": 4126 }, { "epoch": 1.13, "grad_norm": 1.9189738564018048, "learning_rate": 7.182402543363319e-06, "loss": 0.1159, "step": 4127 }, { "epoch": 1.13, "grad_norm": 1.9614473738193228, "learning_rate": 7.1810765577980305e-06, "loss": 0.1084, "step": 4128 }, { "epoch": 1.13, "grad_norm": 2.102583468885272, "learning_rate": 7.179750382763931e-06, "loss": 0.1053, "step": 4129 }, { "epoch": 1.13, "grad_norm": 2.0328218705613574, "learning_rate": 7.178424018376224e-06, "loss": 0.1301, "step": 4130 }, { "epoch": 1.13, "grad_norm": 2.066696744552882, "learning_rate": 7.177097464750134e-06, "loss": 0.119, "step": 4131 }, { "epoch": 1.13, "grad_norm": 2.071562043633715, "learning_rate": 7.175770722000893e-06, "loss": 0.1305, "step": 4132 }, { "epoch": 1.13, "grad_norm": 2.294928397577337, "learning_rate": 7.174443790243758e-06, "loss": 0.133, "step": 4133 }, { "epoch": 1.13, "grad_norm": 2.0890502330090364, "learning_rate": 7.173116669593997e-06, "loss": 0.139, "step": 4134 }, { "epoch": 1.13, "grad_norm": 2.1316413776312366, "learning_rate": 7.171789360166896e-06, "loss": 0.1233, "step": 4135 }, { "epoch": 1.13, "grad_norm": 1.9121317906783983, "learning_rate": 7.170461862077759e-06, "loss": 0.1187, "step": 4136 }, { "epoch": 1.13, "grad_norm": 2.1341586131665102, "learning_rate": 7.169134175441904e-06, "loss": 0.129, "step": 4137 }, { "epoch": 1.13, "grad_norm": 2.152178980078153, "learning_rate": 7.167806300374665e-06, "loss": 0.1355, "step": 4138 }, { "epoch": 1.13, "grad_norm": 2.260737540531499, "learning_rate": 7.166478236991396e-06, "loss": 0.144, "step": 4139 }, { "epoch": 1.13, "grad_norm": 2.3243095875135924, "learning_rate": 7.165149985407465e-06, "loss": 0.1214, "step": 4140 }, { "epoch": 1.13, "grad_norm": 1.9983298131638916, "learning_rate": 7.163821545738254e-06, "loss": 0.097, "step": 4141 }, { "epoch": 1.13, "grad_norm": 2.14539639922315, "learning_rate": 7.162492918099167e-06, "loss": 0.1354, "step": 4142 }, { "epoch": 1.13, "grad_norm": 2.073087929254994, "learning_rate": 7.16116410260562e-06, "loss": 0.1284, "step": 4143 }, { "epoch": 1.13, "grad_norm": 1.8304532592972178, "learning_rate": 7.1598350993730435e-06, "loss": 0.1099, "step": 4144 }, { "epoch": 1.13, "grad_norm": 2.290558617469023, "learning_rate": 7.158505908516891e-06, "loss": 0.1211, "step": 4145 }, { "epoch": 1.13, "grad_norm": 1.834973411605714, "learning_rate": 7.157176530152628e-06, "loss": 0.1192, "step": 4146 }, { "epoch": 1.13, "grad_norm": 2.069403813635896, "learning_rate": 7.155846964395734e-06, "loss": 0.1241, "step": 4147 }, { "epoch": 1.13, "grad_norm": 1.9038946446698481, "learning_rate": 7.154517211361709e-06, "loss": 0.1285, "step": 4148 }, { "epoch": 1.13, "grad_norm": 2.1139802214615795, "learning_rate": 7.153187271166071e-06, "loss": 0.1209, "step": 4149 }, { "epoch": 1.13, "grad_norm": 2.5077831189380517, "learning_rate": 7.151857143924345e-06, "loss": 0.1387, "step": 4150 }, { "epoch": 1.13, "grad_norm": 2.2675835021650275, "learning_rate": 7.150526829752085e-06, "loss": 0.1404, "step": 4151 }, { "epoch": 1.13, "grad_norm": 1.8652222644044676, "learning_rate": 7.14919632876485e-06, "loss": 0.1228, "step": 4152 }, { "epoch": 1.13, "grad_norm": 1.8811022419661327, "learning_rate": 7.147865641078221e-06, "loss": 0.1173, "step": 4153 }, { "epoch": 1.13, "grad_norm": 2.1861974372829143, "learning_rate": 7.146534766807794e-06, "loss": 0.1421, "step": 4154 }, { "epoch": 1.13, "grad_norm": 2.0373985670142067, "learning_rate": 7.145203706069183e-06, "loss": 0.1448, "step": 4155 }, { "epoch": 1.13, "grad_norm": 2.0403613502975824, "learning_rate": 7.143872458978013e-06, "loss": 0.1294, "step": 4156 }, { "epoch": 1.13, "grad_norm": 2.0392816974201753, "learning_rate": 7.142541025649932e-06, "loss": 0.128, "step": 4157 }, { "epoch": 1.14, "grad_norm": 1.9652661242245302, "learning_rate": 7.1412094062005985e-06, "loss": 0.1199, "step": 4158 }, { "epoch": 1.14, "grad_norm": 1.969055061144305, "learning_rate": 7.139877600745691e-06, "loss": 0.1145, "step": 4159 }, { "epoch": 1.14, "grad_norm": 1.90720565346627, "learning_rate": 7.138545609400901e-06, "loss": 0.1341, "step": 4160 }, { "epoch": 1.14, "grad_norm": 2.033805183744819, "learning_rate": 7.13721343228194e-06, "loss": 0.1222, "step": 4161 }, { "epoch": 1.14, "grad_norm": 1.7017616871810028, "learning_rate": 7.135881069504531e-06, "loss": 0.0993, "step": 4162 }, { "epoch": 1.14, "grad_norm": 2.003142843982908, "learning_rate": 7.134548521184417e-06, "loss": 0.1227, "step": 4163 }, { "epoch": 1.14, "grad_norm": 1.8475285773033214, "learning_rate": 7.1332157874373565e-06, "loss": 0.1044, "step": 4164 }, { "epoch": 1.14, "grad_norm": 1.9241259271038214, "learning_rate": 7.1318828683791205e-06, "loss": 0.114, "step": 4165 }, { "epoch": 1.14, "grad_norm": 1.873873295806884, "learning_rate": 7.130549764125502e-06, "loss": 0.1159, "step": 4166 }, { "epoch": 1.14, "grad_norm": 2.1965279420717434, "learning_rate": 7.129216474792305e-06, "loss": 0.1238, "step": 4167 }, { "epoch": 1.14, "grad_norm": 2.0584641430052115, "learning_rate": 7.127883000495353e-06, "loss": 0.1282, "step": 4168 }, { "epoch": 1.14, "grad_norm": 2.2082759397085514, "learning_rate": 7.1265493413504815e-06, "loss": 0.1227, "step": 4169 }, { "epoch": 1.14, "grad_norm": 2.0151240493765266, "learning_rate": 7.125215497473548e-06, "loss": 0.1054, "step": 4170 }, { "epoch": 1.14, "grad_norm": 2.000606501275892, "learning_rate": 7.123881468980419e-06, "loss": 0.1216, "step": 4171 }, { "epoch": 1.14, "grad_norm": 2.4278567707426846, "learning_rate": 7.122547255986985e-06, "loss": 0.1339, "step": 4172 }, { "epoch": 1.14, "grad_norm": 2.128830850778144, "learning_rate": 7.121212858609146e-06, "loss": 0.1238, "step": 4173 }, { "epoch": 1.14, "grad_norm": 1.9512308665181886, "learning_rate": 7.119878276962818e-06, "loss": 0.1188, "step": 4174 }, { "epoch": 1.14, "grad_norm": 1.8742164132560155, "learning_rate": 7.11854351116394e-06, "loss": 0.1175, "step": 4175 }, { "epoch": 1.14, "grad_norm": 1.9961944791213952, "learning_rate": 7.11720856132846e-06, "loss": 0.1126, "step": 4176 }, { "epoch": 1.14, "grad_norm": 2.0353395389191196, "learning_rate": 7.115873427572342e-06, "loss": 0.1391, "step": 4177 }, { "epoch": 1.14, "grad_norm": 2.0767031568616745, "learning_rate": 7.114538110011573e-06, "loss": 0.1444, "step": 4178 }, { "epoch": 1.14, "grad_norm": 1.8728855260683073, "learning_rate": 7.1132026087621485e-06, "loss": 0.1254, "step": 4179 }, { "epoch": 1.14, "grad_norm": 2.0147393998570635, "learning_rate": 7.111866923940083e-06, "loss": 0.1159, "step": 4180 }, { "epoch": 1.14, "grad_norm": 2.04122326472132, "learning_rate": 7.110531055661406e-06, "loss": 0.1302, "step": 4181 }, { "epoch": 1.14, "grad_norm": 1.9710831441334116, "learning_rate": 7.109195004042164e-06, "loss": 0.1104, "step": 4182 }, { "epoch": 1.14, "grad_norm": 1.8442436652944234, "learning_rate": 7.10785876919842e-06, "loss": 0.1059, "step": 4183 }, { "epoch": 1.14, "grad_norm": 1.8174687215989198, "learning_rate": 7.106522351246252e-06, "loss": 0.1226, "step": 4184 }, { "epoch": 1.14, "grad_norm": 1.7421700693815974, "learning_rate": 7.105185750301751e-06, "loss": 0.1223, "step": 4185 }, { "epoch": 1.14, "grad_norm": 1.9449522372366252, "learning_rate": 7.10384896648103e-06, "loss": 0.1147, "step": 4186 }, { "epoch": 1.14, "grad_norm": 2.072921319594952, "learning_rate": 7.102511999900213e-06, "loss": 0.12, "step": 4187 }, { "epoch": 1.14, "grad_norm": 1.7985882639390682, "learning_rate": 7.101174850675442e-06, "loss": 0.1165, "step": 4188 }, { "epoch": 1.14, "grad_norm": 1.8685823692413241, "learning_rate": 7.099837518922873e-06, "loss": 0.1167, "step": 4189 }, { "epoch": 1.14, "grad_norm": 1.74136200040315, "learning_rate": 7.098500004758682e-06, "loss": 0.1135, "step": 4190 }, { "epoch": 1.14, "grad_norm": 1.961049470256503, "learning_rate": 7.097162308299055e-06, "loss": 0.1186, "step": 4191 }, { "epoch": 1.14, "grad_norm": 2.078256771796782, "learning_rate": 7.095824429660199e-06, "loss": 0.1431, "step": 4192 }, { "epoch": 1.14, "grad_norm": 1.774420858630228, "learning_rate": 7.094486368958334e-06, "loss": 0.1062, "step": 4193 }, { "epoch": 1.14, "grad_norm": 1.9955226694247632, "learning_rate": 7.093148126309697e-06, "loss": 0.1259, "step": 4194 }, { "epoch": 1.15, "grad_norm": 2.205935054140034, "learning_rate": 7.091809701830539e-06, "loss": 0.1105, "step": 4195 }, { "epoch": 1.15, "grad_norm": 2.3782782663129436, "learning_rate": 7.090471095637129e-06, "loss": 0.1471, "step": 4196 }, { "epoch": 1.15, "grad_norm": 2.173317062903518, "learning_rate": 7.0891323078457505e-06, "loss": 0.1346, "step": 4197 }, { "epoch": 1.15, "grad_norm": 2.295335070599442, "learning_rate": 7.087793338572705e-06, "loss": 0.1523, "step": 4198 }, { "epoch": 1.15, "grad_norm": 2.0479439613412205, "learning_rate": 7.086454187934306e-06, "loss": 0.1195, "step": 4199 }, { "epoch": 1.15, "grad_norm": 2.1312433479321022, "learning_rate": 7.085114856046884e-06, "loss": 0.1355, "step": 4200 }, { "epoch": 1.15, "grad_norm": 1.9229956267624408, "learning_rate": 7.083775343026789e-06, "loss": 0.1167, "step": 4201 }, { "epoch": 1.15, "grad_norm": 2.1150266225406, "learning_rate": 7.082435648990381e-06, "loss": 0.1139, "step": 4202 }, { "epoch": 1.15, "grad_norm": 1.8607211296616, "learning_rate": 7.08109577405404e-06, "loss": 0.128, "step": 4203 }, { "epoch": 1.15, "grad_norm": 1.9579360566153368, "learning_rate": 7.079755718334158e-06, "loss": 0.1396, "step": 4204 }, { "epoch": 1.15, "grad_norm": 1.5958254655223028, "learning_rate": 7.0784154819471484e-06, "loss": 0.1068, "step": 4205 }, { "epoch": 1.15, "grad_norm": 2.370232076423813, "learning_rate": 7.0770750650094335e-06, "loss": 0.1564, "step": 4206 }, { "epoch": 1.15, "grad_norm": 2.0578095908609813, "learning_rate": 7.075734467637454e-06, "loss": 0.1346, "step": 4207 }, { "epoch": 1.15, "grad_norm": 1.7834407060485793, "learning_rate": 7.074393689947671e-06, "loss": 0.1047, "step": 4208 }, { "epoch": 1.15, "grad_norm": 2.00065412560574, "learning_rate": 7.073052732056553e-06, "loss": 0.1205, "step": 4209 }, { "epoch": 1.15, "grad_norm": 1.9288737725165355, "learning_rate": 7.07171159408059e-06, "loss": 0.1154, "step": 4210 }, { "epoch": 1.15, "grad_norm": 2.0338613156759084, "learning_rate": 7.070370276136287e-06, "loss": 0.1514, "step": 4211 }, { "epoch": 1.15, "grad_norm": 2.046183984541071, "learning_rate": 7.06902877834016e-06, "loss": 0.1223, "step": 4212 }, { "epoch": 1.15, "grad_norm": 1.6956898338198543, "learning_rate": 7.0676871008087465e-06, "loss": 0.1038, "step": 4213 }, { "epoch": 1.15, "grad_norm": 1.817554657649637, "learning_rate": 7.066345243658598e-06, "loss": 0.1205, "step": 4214 }, { "epoch": 1.15, "grad_norm": 2.050161417853629, "learning_rate": 7.065003207006278e-06, "loss": 0.1206, "step": 4215 }, { "epoch": 1.15, "grad_norm": 1.8997314485130785, "learning_rate": 7.06366099096837e-06, "loss": 0.0958, "step": 4216 }, { "epoch": 1.15, "grad_norm": 2.2671000738009854, "learning_rate": 7.062318595661475e-06, "loss": 0.1653, "step": 4217 }, { "epoch": 1.15, "grad_norm": 2.061360639217304, "learning_rate": 7.0609760212021994e-06, "loss": 0.1151, "step": 4218 }, { "epoch": 1.15, "grad_norm": 2.102723365197078, "learning_rate": 7.059633267707176e-06, "loss": 0.114, "step": 4219 }, { "epoch": 1.15, "grad_norm": 2.109063012503294, "learning_rate": 7.058290335293048e-06, "loss": 0.128, "step": 4220 }, { "epoch": 1.15, "grad_norm": 2.117995857791581, "learning_rate": 7.056947224076475e-06, "loss": 0.1337, "step": 4221 }, { "epoch": 1.15, "grad_norm": 2.158029791174112, "learning_rate": 7.055603934174132e-06, "loss": 0.1304, "step": 4222 }, { "epoch": 1.15, "grad_norm": 2.0278188098729975, "learning_rate": 7.054260465702712e-06, "loss": 0.13, "step": 4223 }, { "epoch": 1.15, "grad_norm": 2.389617138982808, "learning_rate": 7.052916818778918e-06, "loss": 0.1422, "step": 4224 }, { "epoch": 1.15, "grad_norm": 2.0315604299132906, "learning_rate": 7.051572993519474e-06, "loss": 0.0992, "step": 4225 }, { "epoch": 1.15, "grad_norm": 2.212832764798801, "learning_rate": 7.050228990041117e-06, "loss": 0.1428, "step": 4226 }, { "epoch": 1.15, "grad_norm": 1.8986893222541308, "learning_rate": 7.048884808460599e-06, "loss": 0.1294, "step": 4227 }, { "epoch": 1.15, "grad_norm": 1.9259685203070438, "learning_rate": 7.047540448894687e-06, "loss": 0.1018, "step": 4228 }, { "epoch": 1.15, "grad_norm": 1.7217273337801775, "learning_rate": 7.04619591146017e-06, "loss": 0.1121, "step": 4229 }, { "epoch": 1.15, "grad_norm": 1.8423791308770312, "learning_rate": 7.044851196273841e-06, "loss": 0.1184, "step": 4230 }, { "epoch": 1.16, "grad_norm": 2.0711187783761025, "learning_rate": 7.0435063034525164e-06, "loss": 0.123, "step": 4231 }, { "epoch": 1.16, "grad_norm": 1.95835572032229, "learning_rate": 7.042161233113029e-06, "loss": 0.1407, "step": 4232 }, { "epoch": 1.16, "grad_norm": 1.7439597108143134, "learning_rate": 7.040815985372221e-06, "loss": 0.1195, "step": 4233 }, { "epoch": 1.16, "grad_norm": 1.7258780266423253, "learning_rate": 7.039470560346955e-06, "loss": 0.1067, "step": 4234 }, { "epoch": 1.16, "grad_norm": 2.344276126992682, "learning_rate": 7.038124958154108e-06, "loss": 0.1525, "step": 4235 }, { "epoch": 1.16, "grad_norm": 1.9654926615433947, "learning_rate": 7.036779178910569e-06, "loss": 0.1396, "step": 4236 }, { "epoch": 1.16, "grad_norm": 1.943138094752504, "learning_rate": 7.035433222733246e-06, "loss": 0.1147, "step": 4237 }, { "epoch": 1.16, "grad_norm": 1.9334169758925888, "learning_rate": 7.0340870897390635e-06, "loss": 0.1085, "step": 4238 }, { "epoch": 1.16, "grad_norm": 1.6837298765295836, "learning_rate": 7.032740780044957e-06, "loss": 0.1026, "step": 4239 }, { "epoch": 1.16, "grad_norm": 1.9445648272349414, "learning_rate": 7.031394293767879e-06, "loss": 0.1286, "step": 4240 }, { "epoch": 1.16, "grad_norm": 1.8476097235385962, "learning_rate": 7.030047631024801e-06, "loss": 0.116, "step": 4241 }, { "epoch": 1.16, "grad_norm": 2.2111881567549516, "learning_rate": 7.028700791932703e-06, "loss": 0.1287, "step": 4242 }, { "epoch": 1.16, "grad_norm": 2.1320852057264488, "learning_rate": 7.027353776608587e-06, "loss": 0.1183, "step": 4243 }, { "epoch": 1.16, "grad_norm": 1.9700567037683123, "learning_rate": 7.026006585169467e-06, "loss": 0.1149, "step": 4244 }, { "epoch": 1.16, "grad_norm": 2.3428504867811513, "learning_rate": 7.024659217732372e-06, "loss": 0.1333, "step": 4245 }, { "epoch": 1.16, "grad_norm": 2.080400968766066, "learning_rate": 7.023311674414346e-06, "loss": 0.1295, "step": 4246 }, { "epoch": 1.16, "grad_norm": 2.2448668670048293, "learning_rate": 7.0219639553324525e-06, "loss": 0.1343, "step": 4247 }, { "epoch": 1.16, "grad_norm": 2.0593018432261805, "learning_rate": 7.020616060603765e-06, "loss": 0.124, "step": 4248 }, { "epoch": 1.16, "grad_norm": 1.8586380750129603, "learning_rate": 7.019267990345372e-06, "loss": 0.1195, "step": 4249 }, { "epoch": 1.16, "grad_norm": 2.047150637397853, "learning_rate": 7.017919744674384e-06, "loss": 0.1012, "step": 4250 }, { "epoch": 1.16, "grad_norm": 2.1457479593067754, "learning_rate": 7.016571323707919e-06, "loss": 0.1299, "step": 4251 }, { "epoch": 1.16, "grad_norm": 2.1857211462970683, "learning_rate": 7.0152227275631144e-06, "loss": 0.1335, "step": 4252 }, { "epoch": 1.16, "grad_norm": 1.932837708680595, "learning_rate": 7.013873956357123e-06, "loss": 0.1211, "step": 4253 }, { "epoch": 1.16, "grad_norm": 1.5781721297960525, "learning_rate": 7.0125250102071115e-06, "loss": 0.0911, "step": 4254 }, { "epoch": 1.16, "grad_norm": 1.9521296349302901, "learning_rate": 7.011175889230261e-06, "loss": 0.1222, "step": 4255 }, { "epoch": 1.16, "grad_norm": 2.076519599463524, "learning_rate": 7.009826593543769e-06, "loss": 0.0995, "step": 4256 }, { "epoch": 1.16, "grad_norm": 1.8167851730626865, "learning_rate": 7.008477123264849e-06, "loss": 0.1253, "step": 4257 }, { "epoch": 1.16, "grad_norm": 1.8448176798392104, "learning_rate": 7.007127478510727e-06, "loss": 0.1167, "step": 4258 }, { "epoch": 1.16, "grad_norm": 2.2332308101911242, "learning_rate": 7.005777659398647e-06, "loss": 0.1486, "step": 4259 }, { "epoch": 1.16, "grad_norm": 2.0190051945147904, "learning_rate": 7.004427666045867e-06, "loss": 0.1317, "step": 4260 }, { "epoch": 1.16, "grad_norm": 1.8764438738661249, "learning_rate": 7.00307749856966e-06, "loss": 0.1208, "step": 4261 }, { "epoch": 1.16, "grad_norm": 2.0725838482772905, "learning_rate": 7.001727157087316e-06, "loss": 0.1225, "step": 4262 }, { "epoch": 1.16, "grad_norm": 2.0932489701436228, "learning_rate": 7.0003766417161335e-06, "loss": 0.1161, "step": 4263 }, { "epoch": 1.16, "grad_norm": 2.00441893964883, "learning_rate": 6.999025952573435e-06, "loss": 0.14, "step": 4264 }, { "epoch": 1.16, "grad_norm": 1.846084268613071, "learning_rate": 6.997675089776554e-06, "loss": 0.1287, "step": 4265 }, { "epoch": 1.16, "grad_norm": 1.85172270423492, "learning_rate": 6.9963240534428374e-06, "loss": 0.1022, "step": 4266 }, { "epoch": 1.16, "grad_norm": 2.3668144447478334, "learning_rate": 6.994972843689651e-06, "loss": 0.1612, "step": 4267 }, { "epoch": 1.17, "grad_norm": 1.6711942449879786, "learning_rate": 6.993621460634371e-06, "loss": 0.1033, "step": 4268 }, { "epoch": 1.17, "grad_norm": 1.712624127512383, "learning_rate": 6.992269904394392e-06, "loss": 0.0943, "step": 4269 }, { "epoch": 1.17, "grad_norm": 1.800327483910594, "learning_rate": 6.990918175087124e-06, "loss": 0.1015, "step": 4270 }, { "epoch": 1.17, "grad_norm": 2.0961204019571076, "learning_rate": 6.989566272829989e-06, "loss": 0.1448, "step": 4271 }, { "epoch": 1.17, "grad_norm": 2.054936268423516, "learning_rate": 6.98821419774043e-06, "loss": 0.1341, "step": 4272 }, { "epoch": 1.17, "grad_norm": 1.8976777208083724, "learning_rate": 6.986861949935897e-06, "loss": 0.1069, "step": 4273 }, { "epoch": 1.17, "grad_norm": 2.0912721242077508, "learning_rate": 6.985509529533859e-06, "loss": 0.132, "step": 4274 }, { "epoch": 1.17, "grad_norm": 1.9678223030561028, "learning_rate": 6.984156936651802e-06, "loss": 0.119, "step": 4275 }, { "epoch": 1.17, "grad_norm": 1.7853394273208971, "learning_rate": 6.982804171407225e-06, "loss": 0.1188, "step": 4276 }, { "epoch": 1.17, "grad_norm": 1.7908139763191069, "learning_rate": 6.981451233917639e-06, "loss": 0.1039, "step": 4277 }, { "epoch": 1.17, "grad_norm": 2.084509822331322, "learning_rate": 6.980098124300576e-06, "loss": 0.1206, "step": 4278 }, { "epoch": 1.17, "grad_norm": 2.052934168735636, "learning_rate": 6.978744842673578e-06, "loss": 0.1291, "step": 4279 }, { "epoch": 1.17, "grad_norm": 2.3931468954730373, "learning_rate": 6.977391389154204e-06, "loss": 0.1395, "step": 4280 }, { "epoch": 1.17, "grad_norm": 1.9541633086080412, "learning_rate": 6.9760377638600295e-06, "loss": 0.1124, "step": 4281 }, { "epoch": 1.17, "grad_norm": 1.868302049787096, "learning_rate": 6.974683966908642e-06, "loss": 0.1187, "step": 4282 }, { "epoch": 1.17, "grad_norm": 1.9710634722054383, "learning_rate": 6.973329998417643e-06, "loss": 0.1093, "step": 4283 }, { "epoch": 1.17, "grad_norm": 2.1819134685202495, "learning_rate": 6.971975858504653e-06, "loss": 0.1345, "step": 4284 }, { "epoch": 1.17, "grad_norm": 1.9557615338724021, "learning_rate": 6.970621547287306e-06, "loss": 0.1051, "step": 4285 }, { "epoch": 1.17, "grad_norm": 2.0865632291687874, "learning_rate": 6.969267064883247e-06, "loss": 0.1213, "step": 4286 }, { "epoch": 1.17, "grad_norm": 2.0096697189694095, "learning_rate": 6.967912411410143e-06, "loss": 0.1279, "step": 4287 }, { "epoch": 1.17, "grad_norm": 1.897429747021453, "learning_rate": 6.966557586985671e-06, "loss": 0.1304, "step": 4288 }, { "epoch": 1.17, "grad_norm": 1.6844514227148697, "learning_rate": 6.965202591727521e-06, "loss": 0.0887, "step": 4289 }, { "epoch": 1.17, "grad_norm": 1.938360386958679, "learning_rate": 6.9638474257534025e-06, "loss": 0.1354, "step": 4290 }, { "epoch": 1.17, "grad_norm": 2.047576512757608, "learning_rate": 6.96249208918104e-06, "loss": 0.1319, "step": 4291 }, { "epoch": 1.17, "grad_norm": 1.8197269478309723, "learning_rate": 6.961136582128167e-06, "loss": 0.0974, "step": 4292 }, { "epoch": 1.17, "grad_norm": 1.9062555607494636, "learning_rate": 6.959780904712538e-06, "loss": 0.1255, "step": 4293 }, { "epoch": 1.17, "grad_norm": 2.0897857656234136, "learning_rate": 6.95842505705192e-06, "loss": 0.1296, "step": 4294 }, { "epoch": 1.17, "grad_norm": 2.0633233737730983, "learning_rate": 6.957069039264093e-06, "loss": 0.1415, "step": 4295 }, { "epoch": 1.17, "grad_norm": 2.231369399217659, "learning_rate": 6.9557128514668535e-06, "loss": 0.142, "step": 4296 }, { "epoch": 1.17, "grad_norm": 1.9044511720153494, "learning_rate": 6.954356493778016e-06, "loss": 0.1223, "step": 4297 }, { "epoch": 1.17, "grad_norm": 1.9876611952437309, "learning_rate": 6.952999966315402e-06, "loss": 0.139, "step": 4298 }, { "epoch": 1.17, "grad_norm": 1.6674830669223852, "learning_rate": 6.951643269196855e-06, "loss": 0.1068, "step": 4299 }, { "epoch": 1.17, "grad_norm": 1.9194246382596751, "learning_rate": 6.950286402540231e-06, "loss": 0.1266, "step": 4300 }, { "epoch": 1.17, "grad_norm": 1.879631995162897, "learning_rate": 6.948929366463397e-06, "loss": 0.1114, "step": 4301 }, { "epoch": 1.17, "grad_norm": 1.7327504748182798, "learning_rate": 6.94757216108424e-06, "loss": 0.0962, "step": 4302 }, { "epoch": 1.17, "grad_norm": 1.9077091626100586, "learning_rate": 6.9462147865206616e-06, "loss": 0.125, "step": 4303 }, { "epoch": 1.17, "grad_norm": 2.7212628925580487, "learning_rate": 6.944857242890573e-06, "loss": 0.1327, "step": 4304 }, { "epoch": 1.18, "grad_norm": 2.0529262741899923, "learning_rate": 6.943499530311903e-06, "loss": 0.115, "step": 4305 }, { "epoch": 1.18, "grad_norm": 1.5875285032245656, "learning_rate": 6.942141648902599e-06, "loss": 0.0977, "step": 4306 }, { "epoch": 1.18, "grad_norm": 2.02722199605142, "learning_rate": 6.940783598780613e-06, "loss": 0.126, "step": 4307 }, { "epoch": 1.18, "grad_norm": 2.0379055128484453, "learning_rate": 6.939425380063924e-06, "loss": 0.1193, "step": 4308 }, { "epoch": 1.18, "grad_norm": 2.0838819666108805, "learning_rate": 6.938066992870519e-06, "loss": 0.1246, "step": 4309 }, { "epoch": 1.18, "grad_norm": 2.1960063506491663, "learning_rate": 6.936708437318397e-06, "loss": 0.1393, "step": 4310 }, { "epoch": 1.18, "grad_norm": 1.7414271553138678, "learning_rate": 6.935349713525577e-06, "loss": 0.1023, "step": 4311 }, { "epoch": 1.18, "grad_norm": 1.9884633759299402, "learning_rate": 6.93399082161009e-06, "loss": 0.1168, "step": 4312 }, { "epoch": 1.18, "grad_norm": 1.9144685669895434, "learning_rate": 6.932631761689982e-06, "loss": 0.1052, "step": 4313 }, { "epoch": 1.18, "grad_norm": 1.8654138829745965, "learning_rate": 6.931272533883313e-06, "loss": 0.11, "step": 4314 }, { "epoch": 1.18, "grad_norm": 2.049301376570788, "learning_rate": 6.929913138308162e-06, "loss": 0.1103, "step": 4315 }, { "epoch": 1.18, "grad_norm": 2.061390570549711, "learning_rate": 6.928553575082615e-06, "loss": 0.1312, "step": 4316 }, { "epoch": 1.18, "grad_norm": 1.9287779484180387, "learning_rate": 6.927193844324777e-06, "loss": 0.1078, "step": 4317 }, { "epoch": 1.18, "grad_norm": 2.231122509242185, "learning_rate": 6.925833946152769e-06, "loss": 0.144, "step": 4318 }, { "epoch": 1.18, "grad_norm": 2.0769626714806115, "learning_rate": 6.924473880684721e-06, "loss": 0.11, "step": 4319 }, { "epoch": 1.18, "grad_norm": 2.479447013210604, "learning_rate": 6.923113648038784e-06, "loss": 0.1532, "step": 4320 }, { "epoch": 1.18, "grad_norm": 2.0848371895590168, "learning_rate": 6.921753248333122e-06, "loss": 0.1185, "step": 4321 }, { "epoch": 1.18, "grad_norm": 2.4081735322733953, "learning_rate": 6.920392681685908e-06, "loss": 0.1547, "step": 4322 }, { "epoch": 1.18, "grad_norm": 2.3946156723427046, "learning_rate": 6.919031948215335e-06, "loss": 0.1475, "step": 4323 }, { "epoch": 1.18, "grad_norm": 1.8642727007928703, "learning_rate": 6.917671048039611e-06, "loss": 0.1212, "step": 4324 }, { "epoch": 1.18, "grad_norm": 2.0712212641651067, "learning_rate": 6.916309981276954e-06, "loss": 0.1294, "step": 4325 }, { "epoch": 1.18, "grad_norm": 2.204484927308151, "learning_rate": 6.9149487480456e-06, "loss": 0.1312, "step": 4326 }, { "epoch": 1.18, "grad_norm": 1.7955964672117872, "learning_rate": 6.913587348463802e-06, "loss": 0.1106, "step": 4327 }, { "epoch": 1.18, "grad_norm": 1.8421985067183215, "learning_rate": 6.912225782649818e-06, "loss": 0.1175, "step": 4328 }, { "epoch": 1.18, "grad_norm": 1.6671289885611584, "learning_rate": 6.910864050721928e-06, "loss": 0.1006, "step": 4329 }, { "epoch": 1.18, "grad_norm": 1.891298841138614, "learning_rate": 6.909502152798428e-06, "loss": 0.1287, "step": 4330 }, { "epoch": 1.18, "grad_norm": 1.9311647863765549, "learning_rate": 6.908140088997623e-06, "loss": 0.1229, "step": 4331 }, { "epoch": 1.18, "grad_norm": 1.6658519157331932, "learning_rate": 6.906777859437835e-06, "loss": 0.0829, "step": 4332 }, { "epoch": 1.18, "grad_norm": 1.8796674805603164, "learning_rate": 6.9054154642374e-06, "loss": 0.1084, "step": 4333 }, { "epoch": 1.18, "grad_norm": 1.7905634001115056, "learning_rate": 6.904052903514668e-06, "loss": 0.1046, "step": 4334 }, { "epoch": 1.18, "grad_norm": 2.051185888688871, "learning_rate": 6.902690177388003e-06, "loss": 0.1261, "step": 4335 }, { "epoch": 1.18, "grad_norm": 1.9951267917742708, "learning_rate": 6.901327285975787e-06, "loss": 0.1299, "step": 4336 }, { "epoch": 1.18, "grad_norm": 1.931504460038208, "learning_rate": 6.899964229396412e-06, "loss": 0.1252, "step": 4337 }, { "epoch": 1.18, "grad_norm": 2.114802184147079, "learning_rate": 6.898601007768285e-06, "loss": 0.1236, "step": 4338 }, { "epoch": 1.18, "grad_norm": 2.174600757371958, "learning_rate": 6.897237621209831e-06, "loss": 0.121, "step": 4339 }, { "epoch": 1.18, "grad_norm": 2.1075543090933153, "learning_rate": 6.8958740698394835e-06, "loss": 0.1432, "step": 4340 }, { "epoch": 1.19, "grad_norm": 1.9116202887235025, "learning_rate": 6.894510353775694e-06, "loss": 0.1295, "step": 4341 }, { "epoch": 1.19, "grad_norm": 1.970827893263525, "learning_rate": 6.89314647313693e-06, "loss": 0.1131, "step": 4342 }, { "epoch": 1.19, "grad_norm": 2.1604009117312084, "learning_rate": 6.891782428041668e-06, "loss": 0.1326, "step": 4343 }, { "epoch": 1.19, "grad_norm": 1.9018141443868395, "learning_rate": 6.890418218608403e-06, "loss": 0.1079, "step": 4344 }, { "epoch": 1.19, "grad_norm": 2.077017323668445, "learning_rate": 6.889053844955644e-06, "loss": 0.1355, "step": 4345 }, { "epoch": 1.19, "grad_norm": 1.8279842796933703, "learning_rate": 6.887689307201911e-06, "loss": 0.1157, "step": 4346 }, { "epoch": 1.19, "grad_norm": 1.9804722832426336, "learning_rate": 6.886324605465744e-06, "loss": 0.1426, "step": 4347 }, { "epoch": 1.19, "grad_norm": 1.6854976998558286, "learning_rate": 6.884959739865691e-06, "loss": 0.1068, "step": 4348 }, { "epoch": 1.19, "grad_norm": 2.271612163597498, "learning_rate": 6.883594710520317e-06, "loss": 0.159, "step": 4349 }, { "epoch": 1.19, "grad_norm": 1.9124582695880326, "learning_rate": 6.8822295175482024e-06, "loss": 0.1106, "step": 4350 }, { "epoch": 1.19, "grad_norm": 1.8436492715727826, "learning_rate": 6.880864161067942e-06, "loss": 0.1147, "step": 4351 }, { "epoch": 1.19, "grad_norm": 1.8838711494505864, "learning_rate": 6.879498641198141e-06, "loss": 0.1101, "step": 4352 }, { "epoch": 1.19, "grad_norm": 1.8680932854255603, "learning_rate": 6.878132958057422e-06, "loss": 0.127, "step": 4353 }, { "epoch": 1.19, "grad_norm": 1.8755132497585303, "learning_rate": 6.876767111764422e-06, "loss": 0.1215, "step": 4354 }, { "epoch": 1.19, "grad_norm": 2.151839554497143, "learning_rate": 6.87540110243779e-06, "loss": 0.1451, "step": 4355 }, { "epoch": 1.19, "grad_norm": 1.7696004789834525, "learning_rate": 6.874034930196191e-06, "loss": 0.0976, "step": 4356 }, { "epoch": 1.19, "grad_norm": 1.9009103584189615, "learning_rate": 6.872668595158304e-06, "loss": 0.1304, "step": 4357 }, { "epoch": 1.19, "grad_norm": 1.7888060217500739, "learning_rate": 6.87130209744282e-06, "loss": 0.116, "step": 4358 }, { "epoch": 1.19, "grad_norm": 2.172053258311761, "learning_rate": 6.869935437168449e-06, "loss": 0.1382, "step": 4359 }, { "epoch": 1.19, "grad_norm": 1.8267618551655687, "learning_rate": 6.86856861445391e-06, "loss": 0.1185, "step": 4360 }, { "epoch": 1.19, "grad_norm": 2.108704095534012, "learning_rate": 6.867201629417937e-06, "loss": 0.1475, "step": 4361 }, { "epoch": 1.19, "grad_norm": 2.2745701063767436, "learning_rate": 6.865834482179279e-06, "loss": 0.1637, "step": 4362 }, { "epoch": 1.19, "grad_norm": 2.0638497492061103, "learning_rate": 6.864467172856703e-06, "loss": 0.1155, "step": 4363 }, { "epoch": 1.19, "grad_norm": 2.100832139470805, "learning_rate": 6.863099701568982e-06, "loss": 0.1225, "step": 4364 }, { "epoch": 1.19, "grad_norm": 2.046648632952665, "learning_rate": 6.8617320684349105e-06, "loss": 0.1383, "step": 4365 }, { "epoch": 1.19, "grad_norm": 2.181008204570768, "learning_rate": 6.860364273573292e-06, "loss": 0.1141, "step": 4366 }, { "epoch": 1.19, "grad_norm": 1.9691299787988519, "learning_rate": 6.8589963171029475e-06, "loss": 0.1042, "step": 4367 }, { "epoch": 1.19, "grad_norm": 1.7986259939742626, "learning_rate": 6.85762819914271e-06, "loss": 0.1121, "step": 4368 }, { "epoch": 1.19, "grad_norm": 2.14673264435981, "learning_rate": 6.856259919811427e-06, "loss": 0.1255, "step": 4369 }, { "epoch": 1.19, "grad_norm": 2.0077026238615208, "learning_rate": 6.854891479227959e-06, "loss": 0.1286, "step": 4370 }, { "epoch": 1.19, "grad_norm": 2.0601838545423297, "learning_rate": 6.853522877511184e-06, "loss": 0.1332, "step": 4371 }, { "epoch": 1.19, "grad_norm": 2.2314633703606894, "learning_rate": 6.85215411477999e-06, "loss": 0.1392, "step": 4372 }, { "epoch": 1.19, "grad_norm": 2.1076916919598023, "learning_rate": 6.85078519115328e-06, "loss": 0.1181, "step": 4373 }, { "epoch": 1.19, "grad_norm": 2.143598607788687, "learning_rate": 6.849416106749973e-06, "loss": 0.1307, "step": 4374 }, { "epoch": 1.19, "grad_norm": 1.9671264914126156, "learning_rate": 6.8480468616889994e-06, "loss": 0.1325, "step": 4375 }, { "epoch": 1.19, "grad_norm": 1.8964870655971007, "learning_rate": 6.846677456089305e-06, "loss": 0.1318, "step": 4376 }, { "epoch": 1.19, "grad_norm": 1.9348104393709211, "learning_rate": 6.845307890069851e-06, "loss": 0.128, "step": 4377 }, { "epoch": 1.2, "grad_norm": 7.285546609484436, "learning_rate": 6.843938163749608e-06, "loss": 0.1333, "step": 4378 }, { "epoch": 1.2, "grad_norm": 1.6462569039836328, "learning_rate": 6.842568277247564e-06, "loss": 0.1097, "step": 4379 }, { "epoch": 1.2, "grad_norm": 2.022182533112036, "learning_rate": 6.841198230682723e-06, "loss": 0.1363, "step": 4380 }, { "epoch": 1.2, "grad_norm": 2.074507769633965, "learning_rate": 6.839828024174096e-06, "loss": 0.135, "step": 4381 }, { "epoch": 1.2, "grad_norm": 2.1442307452808556, "learning_rate": 6.838457657840715e-06, "loss": 0.1235, "step": 4382 }, { "epoch": 1.2, "grad_norm": 1.9095702914900408, "learning_rate": 6.837087131801622e-06, "loss": 0.1121, "step": 4383 }, { "epoch": 1.2, "grad_norm": 2.34595380191112, "learning_rate": 6.835716446175872e-06, "loss": 0.1263, "step": 4384 }, { "epoch": 1.2, "grad_norm": 2.1312092270184104, "learning_rate": 6.834345601082538e-06, "loss": 0.1251, "step": 4385 }, { "epoch": 1.2, "grad_norm": 1.8315791869446716, "learning_rate": 6.832974596640704e-06, "loss": 0.1067, "step": 4386 }, { "epoch": 1.2, "grad_norm": 2.132148903617483, "learning_rate": 6.831603432969468e-06, "loss": 0.1418, "step": 4387 }, { "epoch": 1.2, "grad_norm": 2.2407310624758154, "learning_rate": 6.830232110187942e-06, "loss": 0.1514, "step": 4388 }, { "epoch": 1.2, "grad_norm": 2.046107561092263, "learning_rate": 6.8288606284152535e-06, "loss": 0.1273, "step": 4389 }, { "epoch": 1.2, "grad_norm": 2.016467522782691, "learning_rate": 6.827488987770539e-06, "loss": 0.109, "step": 4390 }, { "epoch": 1.2, "grad_norm": 2.0202312089343706, "learning_rate": 6.826117188372956e-06, "loss": 0.1171, "step": 4391 }, { "epoch": 1.2, "grad_norm": 2.083756879788876, "learning_rate": 6.824745230341669e-06, "loss": 0.1362, "step": 4392 }, { "epoch": 1.2, "grad_norm": 2.013940985614311, "learning_rate": 6.82337311379586e-06, "loss": 0.1114, "step": 4393 }, { "epoch": 1.2, "grad_norm": 2.100516784447122, "learning_rate": 6.822000838854724e-06, "loss": 0.1347, "step": 4394 }, { "epoch": 1.2, "grad_norm": 1.8578601927901723, "learning_rate": 6.82062840563747e-06, "loss": 0.0915, "step": 4395 }, { "epoch": 1.2, "grad_norm": 1.8780681222509128, "learning_rate": 6.8192558142633215e-06, "loss": 0.1069, "step": 4396 }, { "epoch": 1.2, "grad_norm": 2.0061680788538667, "learning_rate": 6.817883064851511e-06, "loss": 0.1201, "step": 4397 }, { "epoch": 1.2, "grad_norm": 1.9248814938532648, "learning_rate": 6.816510157521295e-06, "loss": 0.1147, "step": 4398 }, { "epoch": 1.2, "grad_norm": 2.539928457189343, "learning_rate": 6.815137092391929e-06, "loss": 0.1121, "step": 4399 }, { "epoch": 1.2, "grad_norm": 2.0343832360752128, "learning_rate": 6.813763869582694e-06, "loss": 0.1396, "step": 4400 }, { "epoch": 1.2, "grad_norm": 1.8321513543446084, "learning_rate": 6.812390489212885e-06, "loss": 0.101, "step": 4401 }, { "epoch": 1.2, "grad_norm": 1.9640048305537825, "learning_rate": 6.811016951401801e-06, "loss": 0.1016, "step": 4402 }, { "epoch": 1.2, "grad_norm": 1.9505712477330828, "learning_rate": 6.809643256268762e-06, "loss": 0.1119, "step": 4403 }, { "epoch": 1.2, "grad_norm": 2.193291071221819, "learning_rate": 6.8082694039331006e-06, "loss": 0.1083, "step": 4404 }, { "epoch": 1.2, "grad_norm": 2.315161476127364, "learning_rate": 6.806895394514163e-06, "loss": 0.1393, "step": 4405 }, { "epoch": 1.2, "grad_norm": 1.6867695977023893, "learning_rate": 6.8055212281313086e-06, "loss": 0.1106, "step": 4406 }, { "epoch": 1.2, "grad_norm": 2.433538020006179, "learning_rate": 6.80414690490391e-06, "loss": 0.1654, "step": 4407 }, { "epoch": 1.2, "grad_norm": 2.1369109692827317, "learning_rate": 6.802772424951353e-06, "loss": 0.1238, "step": 4408 }, { "epoch": 1.2, "grad_norm": 2.7254889286027804, "learning_rate": 6.801397788393038e-06, "loss": 0.1445, "step": 4409 }, { "epoch": 1.2, "grad_norm": 2.0018589656865635, "learning_rate": 6.800022995348381e-06, "loss": 0.1409, "step": 4410 }, { "epoch": 1.2, "grad_norm": 2.2430094240657827, "learning_rate": 6.798648045936807e-06, "loss": 0.1226, "step": 4411 }, { "epoch": 1.2, "grad_norm": 1.6854282652407255, "learning_rate": 6.797272940277757e-06, "loss": 0.0996, "step": 4412 }, { "epoch": 1.2, "grad_norm": 1.9272242398541077, "learning_rate": 6.795897678490689e-06, "loss": 0.1112, "step": 4413 }, { "epoch": 1.21, "grad_norm": 1.9220261086832267, "learning_rate": 6.7945222606950665e-06, "loss": 0.1326, "step": 4414 }, { "epoch": 1.21, "grad_norm": 2.0466058212255747, "learning_rate": 6.7931466870103735e-06, "loss": 0.1157, "step": 4415 }, { "epoch": 1.21, "grad_norm": 1.8142705250918703, "learning_rate": 6.791770957556106e-06, "loss": 0.0968, "step": 4416 }, { "epoch": 1.21, "grad_norm": 1.9916848763314907, "learning_rate": 6.790395072451772e-06, "loss": 0.1441, "step": 4417 }, { "epoch": 1.21, "grad_norm": 2.048626836698824, "learning_rate": 6.789019031816893e-06, "loss": 0.127, "step": 4418 }, { "epoch": 1.21, "grad_norm": 1.8412008101757644, "learning_rate": 6.787642835771006e-06, "loss": 0.118, "step": 4419 }, { "epoch": 1.21, "grad_norm": 1.9521555072990087, "learning_rate": 6.78626648443366e-06, "loss": 0.1051, "step": 4420 }, { "epoch": 1.21, "grad_norm": 2.285612216585969, "learning_rate": 6.7848899779244175e-06, "loss": 0.1526, "step": 4421 }, { "epoch": 1.21, "grad_norm": 1.9731235622128136, "learning_rate": 6.783513316362855e-06, "loss": 0.1345, "step": 4422 }, { "epoch": 1.21, "grad_norm": 1.9474456606035975, "learning_rate": 6.782136499868562e-06, "loss": 0.1262, "step": 4423 }, { "epoch": 1.21, "grad_norm": 2.248825170734204, "learning_rate": 6.7807595285611425e-06, "loss": 0.1347, "step": 4424 }, { "epoch": 1.21, "grad_norm": 1.8434529464239122, "learning_rate": 6.7793824025602125e-06, "loss": 0.1054, "step": 4425 }, { "epoch": 1.21, "grad_norm": 1.8063599054158566, "learning_rate": 6.778005121985403e-06, "loss": 0.1093, "step": 4426 }, { "epoch": 1.21, "grad_norm": 2.01343133286733, "learning_rate": 6.776627686956354e-06, "loss": 0.1213, "step": 4427 }, { "epoch": 1.21, "grad_norm": 1.9929040654187302, "learning_rate": 6.775250097592728e-06, "loss": 0.1255, "step": 4428 }, { "epoch": 1.21, "grad_norm": 2.031601448213597, "learning_rate": 6.773872354014193e-06, "loss": 0.1236, "step": 4429 }, { "epoch": 1.21, "grad_norm": 1.8493151409399733, "learning_rate": 6.77249445634043e-06, "loss": 0.1272, "step": 4430 }, { "epoch": 1.21, "grad_norm": 1.9904998016818611, "learning_rate": 6.77111640469114e-06, "loss": 0.108, "step": 4431 }, { "epoch": 1.21, "grad_norm": 2.2517350383159638, "learning_rate": 6.769738199186031e-06, "loss": 0.1503, "step": 4432 }, { "epoch": 1.21, "grad_norm": 1.7866866698849637, "learning_rate": 6.768359839944829e-06, "loss": 0.0985, "step": 4433 }, { "epoch": 1.21, "grad_norm": 1.9664737664433554, "learning_rate": 6.766981327087271e-06, "loss": 0.1319, "step": 4434 }, { "epoch": 1.21, "grad_norm": 2.2746306981543842, "learning_rate": 6.765602660733105e-06, "loss": 0.1207, "step": 4435 }, { "epoch": 1.21, "grad_norm": 2.1991484990747083, "learning_rate": 6.764223841002096e-06, "loss": 0.1282, "step": 4436 }, { "epoch": 1.21, "grad_norm": 2.0863456919717156, "learning_rate": 6.762844868014025e-06, "loss": 0.1262, "step": 4437 }, { "epoch": 1.21, "grad_norm": 2.1938037174378966, "learning_rate": 6.761465741888678e-06, "loss": 0.1008, "step": 4438 }, { "epoch": 1.21, "grad_norm": 2.350310303829384, "learning_rate": 6.760086462745858e-06, "loss": 0.1247, "step": 4439 }, { "epoch": 1.21, "grad_norm": 1.7927045497688106, "learning_rate": 6.758707030705387e-06, "loss": 0.115, "step": 4440 }, { "epoch": 1.21, "grad_norm": 2.122607765706097, "learning_rate": 6.757327445887092e-06, "loss": 0.123, "step": 4441 }, { "epoch": 1.21, "grad_norm": 1.9416306853427787, "learning_rate": 6.7559477084108184e-06, "loss": 0.1189, "step": 4442 }, { "epoch": 1.21, "grad_norm": 2.3198364360589645, "learning_rate": 6.754567818396423e-06, "loss": 0.1346, "step": 4443 }, { "epoch": 1.21, "grad_norm": 2.263072512603556, "learning_rate": 6.753187775963773e-06, "loss": 0.1186, "step": 4444 }, { "epoch": 1.21, "grad_norm": 1.831689006643091, "learning_rate": 6.751807581232754e-06, "loss": 0.1031, "step": 4445 }, { "epoch": 1.21, "grad_norm": 2.3303477428587, "learning_rate": 6.750427234323266e-06, "loss": 0.1554, "step": 4446 }, { "epoch": 1.21, "grad_norm": 2.4342204433425465, "learning_rate": 6.749046735355213e-06, "loss": 0.1384, "step": 4447 }, { "epoch": 1.21, "grad_norm": 1.968998918772651, "learning_rate": 6.7476660844485234e-06, "loss": 0.1156, "step": 4448 }, { "epoch": 1.21, "grad_norm": 1.9961313538282164, "learning_rate": 6.746285281723129e-06, "loss": 0.1051, "step": 4449 }, { "epoch": 1.21, "grad_norm": 1.9522877411661375, "learning_rate": 6.744904327298982e-06, "loss": 0.1145, "step": 4450 }, { "epoch": 1.22, "grad_norm": 1.8731526581015785, "learning_rate": 6.743523221296044e-06, "loss": 0.1198, "step": 4451 }, { "epoch": 1.22, "grad_norm": 2.121210828299572, "learning_rate": 6.742141963834294e-06, "loss": 0.1237, "step": 4452 }, { "epoch": 1.22, "grad_norm": 2.270173295214354, "learning_rate": 6.740760555033715e-06, "loss": 0.1334, "step": 4453 }, { "epoch": 1.22, "grad_norm": 1.730138755628579, "learning_rate": 6.739378995014314e-06, "loss": 0.1126, "step": 4454 }, { "epoch": 1.22, "grad_norm": 2.0201315626688863, "learning_rate": 6.737997283896104e-06, "loss": 0.1307, "step": 4455 }, { "epoch": 1.22, "grad_norm": 1.8924361386075295, "learning_rate": 6.7366154217991145e-06, "loss": 0.1302, "step": 4456 }, { "epoch": 1.22, "grad_norm": 1.999772751425837, "learning_rate": 6.735233408843387e-06, "loss": 0.1156, "step": 4457 }, { "epoch": 1.22, "grad_norm": 1.7814555281989872, "learning_rate": 6.7338512451489745e-06, "loss": 0.1153, "step": 4458 }, { "epoch": 1.22, "grad_norm": 1.776911393434251, "learning_rate": 6.732468930835947e-06, "loss": 0.101, "step": 4459 }, { "epoch": 1.22, "grad_norm": 2.2107846725920846, "learning_rate": 6.731086466024386e-06, "loss": 0.1445, "step": 4460 }, { "epoch": 1.22, "grad_norm": 1.9312449991394116, "learning_rate": 6.729703850834381e-06, "loss": 0.1288, "step": 4461 }, { "epoch": 1.22, "grad_norm": 2.3165352765514315, "learning_rate": 6.728321085386043e-06, "loss": 0.1365, "step": 4462 }, { "epoch": 1.22, "grad_norm": 1.6251709154956455, "learning_rate": 6.726938169799492e-06, "loss": 0.1023, "step": 4463 }, { "epoch": 1.22, "grad_norm": 2.1837299822614273, "learning_rate": 6.725555104194858e-06, "loss": 0.1313, "step": 4464 }, { "epoch": 1.22, "grad_norm": 1.8792243099198833, "learning_rate": 6.724171888692288e-06, "loss": 0.1146, "step": 4465 }, { "epoch": 1.22, "grad_norm": 2.0122090309674374, "learning_rate": 6.722788523411945e-06, "loss": 0.1173, "step": 4466 }, { "epoch": 1.22, "grad_norm": 2.0961079441183745, "learning_rate": 6.7214050084739955e-06, "loss": 0.1271, "step": 4467 }, { "epoch": 1.22, "grad_norm": 2.350865763792726, "learning_rate": 6.720021343998627e-06, "loss": 0.1434, "step": 4468 }, { "epoch": 1.22, "grad_norm": 1.8461547466411057, "learning_rate": 6.71863753010604e-06, "loss": 0.1261, "step": 4469 }, { "epoch": 1.22, "grad_norm": 1.8143747731356448, "learning_rate": 6.717253566916442e-06, "loss": 0.0841, "step": 4470 }, { "epoch": 1.22, "grad_norm": 2.335596706648105, "learning_rate": 6.715869454550057e-06, "loss": 0.1407, "step": 4471 }, { "epoch": 1.22, "grad_norm": 2.0746353073848813, "learning_rate": 6.714485193127126e-06, "loss": 0.1265, "step": 4472 }, { "epoch": 1.22, "grad_norm": 2.1667535795163753, "learning_rate": 6.713100782767894e-06, "loss": 0.1102, "step": 4473 }, { "epoch": 1.22, "grad_norm": 1.962765805030947, "learning_rate": 6.711716223592628e-06, "loss": 0.1264, "step": 4474 }, { "epoch": 1.22, "grad_norm": 1.8180298215104513, "learning_rate": 6.710331515721602e-06, "loss": 0.1018, "step": 4475 }, { "epoch": 1.22, "grad_norm": 1.96033300282247, "learning_rate": 6.708946659275104e-06, "loss": 0.1144, "step": 4476 }, { "epoch": 1.22, "grad_norm": 1.9867760162376893, "learning_rate": 6.707561654373436e-06, "loss": 0.1283, "step": 4477 }, { "epoch": 1.22, "grad_norm": 2.143567532233534, "learning_rate": 6.706176501136914e-06, "loss": 0.1278, "step": 4478 }, { "epoch": 1.22, "grad_norm": 2.0813115829365687, "learning_rate": 6.704791199685865e-06, "loss": 0.1047, "step": 4479 }, { "epoch": 1.22, "grad_norm": 1.975664765552879, "learning_rate": 6.703405750140627e-06, "loss": 0.1352, "step": 4480 }, { "epoch": 1.22, "grad_norm": 2.1127830997797807, "learning_rate": 6.702020152621557e-06, "loss": 0.1306, "step": 4481 }, { "epoch": 1.22, "grad_norm": 2.0027344537696314, "learning_rate": 6.700634407249017e-06, "loss": 0.1216, "step": 4482 }, { "epoch": 1.22, "grad_norm": 1.974900933035852, "learning_rate": 6.699248514143388e-06, "loss": 0.1139, "step": 4483 }, { "epoch": 1.22, "grad_norm": 1.9723876288659472, "learning_rate": 6.697862473425063e-06, "loss": 0.1247, "step": 4484 }, { "epoch": 1.22, "grad_norm": 1.9187521069188633, "learning_rate": 6.696476285214444e-06, "loss": 0.105, "step": 4485 }, { "epoch": 1.22, "grad_norm": 2.0557338544531527, "learning_rate": 6.695089949631949e-06, "loss": 0.1279, "step": 4486 }, { "epoch": 1.22, "grad_norm": 2.102788926394792, "learning_rate": 6.69370346679801e-06, "loss": 0.1359, "step": 4487 }, { "epoch": 1.23, "grad_norm": 1.7734037695512, "learning_rate": 6.692316836833066e-06, "loss": 0.098, "step": 4488 }, { "epoch": 1.23, "grad_norm": 2.0504134621274384, "learning_rate": 6.6909300598575764e-06, "loss": 0.1096, "step": 4489 }, { "epoch": 1.23, "grad_norm": 2.3353046327590627, "learning_rate": 6.689543135992009e-06, "loss": 0.1435, "step": 4490 }, { "epoch": 1.23, "grad_norm": 2.065793523917047, "learning_rate": 6.688156065356845e-06, "loss": 0.1428, "step": 4491 }, { "epoch": 1.23, "grad_norm": 2.037792243595762, "learning_rate": 6.686768848072576e-06, "loss": 0.1283, "step": 4492 }, { "epoch": 1.23, "grad_norm": 1.868527858804207, "learning_rate": 6.685381484259712e-06, "loss": 0.1081, "step": 4493 }, { "epoch": 1.23, "grad_norm": 2.1683738640578665, "learning_rate": 6.683993974038771e-06, "loss": 0.1137, "step": 4494 }, { "epoch": 1.23, "grad_norm": 1.8816017107639569, "learning_rate": 6.682606317530284e-06, "loss": 0.1046, "step": 4495 }, { "epoch": 1.23, "grad_norm": 1.86408222955228, "learning_rate": 6.681218514854799e-06, "loss": 0.1262, "step": 4496 }, { "epoch": 1.23, "grad_norm": 1.9696894935007574, "learning_rate": 6.67983056613287e-06, "loss": 0.1261, "step": 4497 }, { "epoch": 1.23, "grad_norm": 1.9056129615210844, "learning_rate": 6.678442471485069e-06, "loss": 0.1296, "step": 4498 }, { "epoch": 1.23, "grad_norm": 2.229792516457032, "learning_rate": 6.677054231031981e-06, "loss": 0.1499, "step": 4499 }, { "epoch": 1.23, "grad_norm": 2.3889220975976624, "learning_rate": 6.675665844894197e-06, "loss": 0.1388, "step": 4500 }, { "epoch": 1.23, "grad_norm": 2.1118565572385384, "learning_rate": 6.674277313192329e-06, "loss": 0.1547, "step": 4501 }, { "epoch": 1.23, "grad_norm": 1.9767194486206585, "learning_rate": 6.672888636046997e-06, "loss": 0.128, "step": 4502 }, { "epoch": 1.23, "grad_norm": 2.1792831450720285, "learning_rate": 6.671499813578835e-06, "loss": 0.1241, "step": 4503 }, { "epoch": 1.23, "grad_norm": 1.8366236456361555, "learning_rate": 6.670110845908486e-06, "loss": 0.1187, "step": 4504 }, { "epoch": 1.23, "grad_norm": 2.090273861052521, "learning_rate": 6.668721733156613e-06, "loss": 0.1343, "step": 4505 }, { "epoch": 1.23, "grad_norm": 2.4073599047775707, "learning_rate": 6.667332475443885e-06, "loss": 0.1295, "step": 4506 }, { "epoch": 1.23, "grad_norm": 1.8723641935867583, "learning_rate": 6.665943072890987e-06, "loss": 0.1191, "step": 4507 }, { "epoch": 1.23, "grad_norm": 2.003749361870532, "learning_rate": 6.664553525618616e-06, "loss": 0.1225, "step": 4508 }, { "epoch": 1.23, "grad_norm": 1.8798065665897452, "learning_rate": 6.663163833747479e-06, "loss": 0.1135, "step": 4509 }, { "epoch": 1.23, "grad_norm": 1.6103720882798456, "learning_rate": 6.6617739973982985e-06, "loss": 0.1009, "step": 4510 }, { "epoch": 1.23, "grad_norm": 1.8036595410315, "learning_rate": 6.660384016691811e-06, "loss": 0.1154, "step": 4511 }, { "epoch": 1.23, "grad_norm": 1.624544364109499, "learning_rate": 6.65899389174876e-06, "loss": 0.0888, "step": 4512 }, { "epoch": 1.23, "grad_norm": 2.0055171201382787, "learning_rate": 6.657603622689908e-06, "loss": 0.1265, "step": 4513 }, { "epoch": 1.23, "grad_norm": 2.0213285277094744, "learning_rate": 6.656213209636024e-06, "loss": 0.1322, "step": 4514 }, { "epoch": 1.23, "grad_norm": 1.8428122938614997, "learning_rate": 6.654822652707893e-06, "loss": 0.1211, "step": 4515 }, { "epoch": 1.23, "grad_norm": 1.9360818066329497, "learning_rate": 6.6534319520263135e-06, "loss": 0.1045, "step": 4516 }, { "epoch": 1.23, "grad_norm": 2.1159799028108504, "learning_rate": 6.652041107712094e-06, "loss": 0.1353, "step": 4517 }, { "epoch": 1.23, "grad_norm": 2.1977305158576805, "learning_rate": 6.6506501198860555e-06, "loss": 0.1369, "step": 4518 }, { "epoch": 1.23, "grad_norm": 1.901432541264185, "learning_rate": 6.649258988669031e-06, "loss": 0.0861, "step": 4519 }, { "epoch": 1.23, "grad_norm": 1.8742972212180524, "learning_rate": 6.647867714181872e-06, "loss": 0.1152, "step": 4520 }, { "epoch": 1.23, "grad_norm": 2.0367128854754273, "learning_rate": 6.646476296545434e-06, "loss": 0.1428, "step": 4521 }, { "epoch": 1.23, "grad_norm": 2.1563396993556196, "learning_rate": 6.645084735880589e-06, "loss": 0.13, "step": 4522 }, { "epoch": 1.23, "grad_norm": 1.915582650070517, "learning_rate": 6.6436930323082215e-06, "loss": 0.119, "step": 4523 }, { "epoch": 1.24, "grad_norm": 2.1115991832060654, "learning_rate": 6.642301185949227e-06, "loss": 0.1391, "step": 4524 }, { "epoch": 1.24, "grad_norm": 2.036518481559962, "learning_rate": 6.640909196924516e-06, "loss": 0.1245, "step": 4525 }, { "epoch": 1.24, "grad_norm": 2.0602933131633576, "learning_rate": 6.6395170653550085e-06, "loss": 0.1276, "step": 4526 }, { "epoch": 1.24, "grad_norm": 2.0035515946775195, "learning_rate": 6.63812479136164e-06, "loss": 0.1112, "step": 4527 }, { "epoch": 1.24, "grad_norm": 1.8421903646175244, "learning_rate": 6.636732375065353e-06, "loss": 0.1179, "step": 4528 }, { "epoch": 1.24, "grad_norm": 1.9315176364317235, "learning_rate": 6.635339816587109e-06, "loss": 0.129, "step": 4529 }, { "epoch": 1.24, "grad_norm": 1.996951127197386, "learning_rate": 6.633947116047877e-06, "loss": 0.118, "step": 4530 }, { "epoch": 1.24, "grad_norm": 1.9967870165783594, "learning_rate": 6.632554273568641e-06, "loss": 0.0982, "step": 4531 }, { "epoch": 1.24, "grad_norm": 1.8242548129565206, "learning_rate": 6.631161289270398e-06, "loss": 0.1074, "step": 4532 }, { "epoch": 1.24, "grad_norm": 2.0970688283744847, "learning_rate": 6.629768163274152e-06, "loss": 0.1374, "step": 4533 }, { "epoch": 1.24, "grad_norm": 2.02810614464841, "learning_rate": 6.628374895700924e-06, "loss": 0.1275, "step": 4534 }, { "epoch": 1.24, "grad_norm": 1.869279469859292, "learning_rate": 6.626981486671748e-06, "loss": 0.1106, "step": 4535 }, { "epoch": 1.24, "grad_norm": 1.7735494316284937, "learning_rate": 6.6255879363076695e-06, "loss": 0.1239, "step": 4536 }, { "epoch": 1.24, "grad_norm": 2.2952517902463923, "learning_rate": 6.62419424472974e-06, "loss": 0.1486, "step": 4537 }, { "epoch": 1.24, "grad_norm": 1.7188167631773674, "learning_rate": 6.622800412059036e-06, "loss": 0.1226, "step": 4538 }, { "epoch": 1.24, "grad_norm": 1.7662766698023447, "learning_rate": 6.621406438416633e-06, "loss": 0.1268, "step": 4539 }, { "epoch": 1.24, "grad_norm": 2.0916233145139174, "learning_rate": 6.620012323923628e-06, "loss": 0.1091, "step": 4540 }, { "epoch": 1.24, "grad_norm": 1.7434188277919636, "learning_rate": 6.618618068701126e-06, "loss": 0.1212, "step": 4541 }, { "epoch": 1.24, "grad_norm": 1.7071286717899237, "learning_rate": 6.617223672870244e-06, "loss": 0.0924, "step": 4542 }, { "epoch": 1.24, "grad_norm": 1.7465804771266527, "learning_rate": 6.615829136552112e-06, "loss": 0.0933, "step": 4543 }, { "epoch": 1.24, "grad_norm": 1.9147516866932275, "learning_rate": 6.614434459867875e-06, "loss": 0.1012, "step": 4544 }, { "epoch": 1.24, "grad_norm": 1.7580181575377933, "learning_rate": 6.613039642938687e-06, "loss": 0.1102, "step": 4545 }, { "epoch": 1.24, "grad_norm": 1.923802986248132, "learning_rate": 6.611644685885713e-06, "loss": 0.124, "step": 4546 }, { "epoch": 1.24, "grad_norm": 1.9493312657273785, "learning_rate": 6.610249588830135e-06, "loss": 0.1191, "step": 4547 }, { "epoch": 1.24, "grad_norm": 2.234923398815059, "learning_rate": 6.60885435189314e-06, "loss": 0.1354, "step": 4548 }, { "epoch": 1.24, "grad_norm": 2.032789365143602, "learning_rate": 6.607458975195937e-06, "loss": 0.1594, "step": 4549 }, { "epoch": 1.24, "grad_norm": 1.9970849235932349, "learning_rate": 6.606063458859737e-06, "loss": 0.127, "step": 4550 }, { "epoch": 1.24, "grad_norm": 1.8447412347452457, "learning_rate": 6.60466780300577e-06, "loss": 0.1176, "step": 4551 }, { "epoch": 1.24, "grad_norm": 1.7550955490696036, "learning_rate": 6.6032720077552744e-06, "loss": 0.1224, "step": 4552 }, { "epoch": 1.24, "grad_norm": 2.3561969956422666, "learning_rate": 6.601876073229504e-06, "loss": 0.145, "step": 4553 }, { "epoch": 1.24, "grad_norm": 1.6786349819561515, "learning_rate": 6.600479999549721e-06, "loss": 0.096, "step": 4554 }, { "epoch": 1.24, "grad_norm": 2.0985536530743962, "learning_rate": 6.599083786837202e-06, "loss": 0.1428, "step": 4555 }, { "epoch": 1.24, "grad_norm": 1.79791758134227, "learning_rate": 6.5976874352132336e-06, "loss": 0.1205, "step": 4556 }, { "epoch": 1.24, "grad_norm": 2.1033145094264425, "learning_rate": 6.59629094479912e-06, "loss": 0.1389, "step": 4557 }, { "epoch": 1.24, "grad_norm": 1.9152687153157175, "learning_rate": 6.59489431571617e-06, "loss": 0.1227, "step": 4558 }, { "epoch": 1.24, "grad_norm": 2.1568646318938414, "learning_rate": 6.593497548085709e-06, "loss": 0.1279, "step": 4559 }, { "epoch": 1.24, "grad_norm": 1.9995263318737113, "learning_rate": 6.592100642029073e-06, "loss": 0.1282, "step": 4560 }, { "epoch": 1.25, "grad_norm": 2.0037478086532774, "learning_rate": 6.5907035976676116e-06, "loss": 0.1138, "step": 4561 }, { "epoch": 1.25, "grad_norm": 2.0818954728418078, "learning_rate": 6.589306415122684e-06, "loss": 0.1229, "step": 4562 }, { "epoch": 1.25, "grad_norm": 2.2580158878139285, "learning_rate": 6.587909094515663e-06, "loss": 0.1625, "step": 4563 }, { "epoch": 1.25, "grad_norm": 1.9823576851391977, "learning_rate": 6.586511635967934e-06, "loss": 0.1402, "step": 4564 }, { "epoch": 1.25, "grad_norm": 1.7923703203378352, "learning_rate": 6.585114039600891e-06, "loss": 0.114, "step": 4565 }, { "epoch": 1.25, "grad_norm": 1.6243933220789477, "learning_rate": 6.5837163055359435e-06, "loss": 0.1055, "step": 4566 }, { "epoch": 1.25, "grad_norm": 1.8643331194757933, "learning_rate": 6.582318433894513e-06, "loss": 0.1135, "step": 4567 }, { "epoch": 1.25, "grad_norm": 1.7890743283091954, "learning_rate": 6.580920424798031e-06, "loss": 0.1023, "step": 4568 }, { "epoch": 1.25, "grad_norm": 1.9794758132092567, "learning_rate": 6.57952227836794e-06, "loss": 0.1137, "step": 4569 }, { "epoch": 1.25, "grad_norm": 1.5970968659901188, "learning_rate": 6.578123994725699e-06, "loss": 0.0918, "step": 4570 }, { "epoch": 1.25, "grad_norm": 2.027923918355746, "learning_rate": 6.576725573992775e-06, "loss": 0.1291, "step": 4571 }, { "epoch": 1.25, "grad_norm": 2.2924311653420846, "learning_rate": 6.575327016290647e-06, "loss": 0.1517, "step": 4572 }, { "epoch": 1.25, "grad_norm": 1.8996138273242262, "learning_rate": 6.573928321740808e-06, "loss": 0.127, "step": 4573 }, { "epoch": 1.25, "grad_norm": 1.6567208691976831, "learning_rate": 6.57252949046476e-06, "loss": 0.0858, "step": 4574 }, { "epoch": 1.25, "grad_norm": 2.144612260631249, "learning_rate": 6.571130522584022e-06, "loss": 0.1178, "step": 4575 }, { "epoch": 1.25, "grad_norm": 2.208753456934384, "learning_rate": 6.569731418220119e-06, "loss": 0.1369, "step": 4576 }, { "epoch": 1.25, "grad_norm": 1.8510551458589826, "learning_rate": 6.56833217749459e-06, "loss": 0.1025, "step": 4577 }, { "epoch": 1.25, "grad_norm": 2.587685679646334, "learning_rate": 6.566932800528987e-06, "loss": 0.1519, "step": 4578 }, { "epoch": 1.25, "grad_norm": 2.0540714070637924, "learning_rate": 6.565533287444874e-06, "loss": 0.1373, "step": 4579 }, { "epoch": 1.25, "grad_norm": 2.0071598776575894, "learning_rate": 6.564133638363823e-06, "loss": 0.1216, "step": 4580 }, { "epoch": 1.25, "grad_norm": 1.7131240886252699, "learning_rate": 6.5627338534074234e-06, "loss": 0.0983, "step": 4581 }, { "epoch": 1.25, "grad_norm": 1.844502087994929, "learning_rate": 6.561333932697275e-06, "loss": 0.1172, "step": 4582 }, { "epoch": 1.25, "grad_norm": 1.925692826679753, "learning_rate": 6.559933876354983e-06, "loss": 0.1126, "step": 4583 }, { "epoch": 1.25, "grad_norm": 2.578651996034083, "learning_rate": 6.558533684502174e-06, "loss": 0.1335, "step": 4584 }, { "epoch": 1.25, "grad_norm": 1.6361737641457619, "learning_rate": 6.557133357260481e-06, "loss": 0.101, "step": 4585 }, { "epoch": 1.25, "grad_norm": 1.8477842780521918, "learning_rate": 6.555732894751548e-06, "loss": 0.1214, "step": 4586 }, { "epoch": 1.25, "grad_norm": 1.9844077766006776, "learning_rate": 6.554332297097032e-06, "loss": 0.1228, "step": 4587 }, { "epoch": 1.25, "grad_norm": 2.059357208925206, "learning_rate": 6.552931564418605e-06, "loss": 0.1271, "step": 4588 }, { "epoch": 1.25, "grad_norm": 2.2640283374933885, "learning_rate": 6.5515306968379445e-06, "loss": 0.1365, "step": 4589 }, { "epoch": 1.25, "grad_norm": 2.0067816907276357, "learning_rate": 6.550129694476744e-06, "loss": 0.1265, "step": 4590 }, { "epoch": 1.25, "grad_norm": 1.7833592758340449, "learning_rate": 6.54872855745671e-06, "loss": 0.1057, "step": 4591 }, { "epoch": 1.25, "grad_norm": 2.1661855172576696, "learning_rate": 6.547327285899556e-06, "loss": 0.1299, "step": 4592 }, { "epoch": 1.25, "grad_norm": 2.0258065636598945, "learning_rate": 6.54592587992701e-06, "loss": 0.1201, "step": 4593 }, { "epoch": 1.25, "grad_norm": 1.6926321408557277, "learning_rate": 6.544524339660813e-06, "loss": 0.0954, "step": 4594 }, { "epoch": 1.25, "grad_norm": 1.801013477938161, "learning_rate": 6.543122665222713e-06, "loss": 0.1124, "step": 4595 }, { "epoch": 1.25, "grad_norm": 1.9662375106303902, "learning_rate": 6.541720856734475e-06, "loss": 0.1244, "step": 4596 }, { "epoch": 1.25, "grad_norm": 1.8997344256893935, "learning_rate": 6.5403189143178725e-06, "loss": 0.1233, "step": 4597 }, { "epoch": 1.26, "grad_norm": 1.8352948642934723, "learning_rate": 6.538916838094691e-06, "loss": 0.1206, "step": 4598 }, { "epoch": 1.26, "grad_norm": 1.8571787063102723, "learning_rate": 6.537514628186727e-06, "loss": 0.1208, "step": 4599 }, { "epoch": 1.26, "grad_norm": 2.4023176474275263, "learning_rate": 6.536112284715795e-06, "loss": 0.154, "step": 4600 }, { "epoch": 1.26, "grad_norm": 1.82491652623926, "learning_rate": 6.534709807803707e-06, "loss": 0.1096, "step": 4601 }, { "epoch": 1.26, "grad_norm": 1.5756244569887157, "learning_rate": 6.533307197572302e-06, "loss": 0.0917, "step": 4602 }, { "epoch": 1.26, "grad_norm": 2.0933927333588933, "learning_rate": 6.5319044541434225e-06, "loss": 0.149, "step": 4603 }, { "epoch": 1.26, "grad_norm": 2.049884612563024, "learning_rate": 6.530501577638923e-06, "loss": 0.1348, "step": 4604 }, { "epoch": 1.26, "grad_norm": 1.797460742808321, "learning_rate": 6.529098568180672e-06, "loss": 0.111, "step": 4605 }, { "epoch": 1.26, "grad_norm": 1.8830847019792387, "learning_rate": 6.527695425890547e-06, "loss": 0.1214, "step": 4606 }, { "epoch": 1.26, "grad_norm": 1.8481337259277173, "learning_rate": 6.526292150890437e-06, "loss": 0.1107, "step": 4607 }, { "epoch": 1.26, "grad_norm": 1.8250724207809175, "learning_rate": 6.5248887433022446e-06, "loss": 0.1014, "step": 4608 }, { "epoch": 1.26, "grad_norm": 1.8573307488975646, "learning_rate": 6.523485203247886e-06, "loss": 0.1087, "step": 4609 }, { "epoch": 1.26, "grad_norm": 2.012869002747278, "learning_rate": 6.5220815308492805e-06, "loss": 0.1353, "step": 4610 }, { "epoch": 1.26, "grad_norm": 2.227262553418994, "learning_rate": 6.520677726228366e-06, "loss": 0.1319, "step": 4611 }, { "epoch": 1.26, "grad_norm": 1.9613893355761616, "learning_rate": 6.519273789507094e-06, "loss": 0.1158, "step": 4612 }, { "epoch": 1.26, "grad_norm": 2.0339892630473075, "learning_rate": 6.517869720807419e-06, "loss": 0.1298, "step": 4613 }, { "epoch": 1.26, "grad_norm": 2.1302049448954494, "learning_rate": 6.5164655202513135e-06, "loss": 0.127, "step": 4614 }, { "epoch": 1.26, "grad_norm": 2.0518336949199614, "learning_rate": 6.51506118796076e-06, "loss": 0.1316, "step": 4615 }, { "epoch": 1.26, "grad_norm": 1.7715262406117718, "learning_rate": 6.513656724057751e-06, "loss": 0.1121, "step": 4616 }, { "epoch": 1.26, "grad_norm": 1.6882796016180939, "learning_rate": 6.512252128664292e-06, "loss": 0.0977, "step": 4617 }, { "epoch": 1.26, "grad_norm": 1.997244188201705, "learning_rate": 6.510847401902398e-06, "loss": 0.1366, "step": 4618 }, { "epoch": 1.26, "grad_norm": 2.297821682832337, "learning_rate": 6.509442543894099e-06, "loss": 0.1607, "step": 4619 }, { "epoch": 1.26, "grad_norm": 1.7312851821001018, "learning_rate": 6.5080375547614325e-06, "loss": 0.1033, "step": 4620 }, { "epoch": 1.26, "grad_norm": 1.7679070426232457, "learning_rate": 6.50663243462645e-06, "loss": 0.1182, "step": 4621 }, { "epoch": 1.26, "grad_norm": 2.137841963950832, "learning_rate": 6.505227183611214e-06, "loss": 0.1176, "step": 4622 }, { "epoch": 1.26, "grad_norm": 1.4801080126343276, "learning_rate": 6.503821801837795e-06, "loss": 0.086, "step": 4623 }, { "epoch": 1.26, "grad_norm": 1.7965054491185959, "learning_rate": 6.502416289428282e-06, "loss": 0.1178, "step": 4624 }, { "epoch": 1.26, "grad_norm": 1.9400787329162716, "learning_rate": 6.501010646504766e-06, "loss": 0.1324, "step": 4625 }, { "epoch": 1.26, "grad_norm": 1.6871720333265288, "learning_rate": 6.499604873189358e-06, "loss": 0.1034, "step": 4626 }, { "epoch": 1.26, "grad_norm": 1.6252937350861658, "learning_rate": 6.498198969604177e-06, "loss": 0.0894, "step": 4627 }, { "epoch": 1.26, "grad_norm": 2.1259664135452265, "learning_rate": 6.49679293587135e-06, "loss": 0.1284, "step": 4628 }, { "epoch": 1.26, "grad_norm": 1.8299341192140002, "learning_rate": 6.495386772113019e-06, "loss": 0.103, "step": 4629 }, { "epoch": 1.26, "grad_norm": 2.0800849613870493, "learning_rate": 6.49398047845134e-06, "loss": 0.1331, "step": 4630 }, { "epoch": 1.26, "grad_norm": 1.7596640862474255, "learning_rate": 6.492574055008474e-06, "loss": 0.102, "step": 4631 }, { "epoch": 1.26, "grad_norm": 2.4210149601902162, "learning_rate": 6.491167501906596e-06, "loss": 0.1468, "step": 4632 }, { "epoch": 1.26, "grad_norm": 2.0448032667643963, "learning_rate": 6.489760819267893e-06, "loss": 0.1122, "step": 4633 }, { "epoch": 1.27, "grad_norm": 2.1356338883247203, "learning_rate": 6.488354007214562e-06, "loss": 0.1385, "step": 4634 }, { "epoch": 1.27, "grad_norm": 1.8896036858816418, "learning_rate": 6.486947065868814e-06, "loss": 0.1107, "step": 4635 }, { "epoch": 1.27, "grad_norm": 2.3322172941578887, "learning_rate": 6.4855399953528675e-06, "loss": 0.1362, "step": 4636 }, { "epoch": 1.27, "grad_norm": 2.0256378509758317, "learning_rate": 6.4841327957889535e-06, "loss": 0.1298, "step": 4637 }, { "epoch": 1.27, "grad_norm": 1.7836263212057055, "learning_rate": 6.482725467299316e-06, "loss": 0.1086, "step": 4638 }, { "epoch": 1.27, "grad_norm": 1.8548198808943297, "learning_rate": 6.481318010006208e-06, "loss": 0.1247, "step": 4639 }, { "epoch": 1.27, "grad_norm": 1.9205156491856794, "learning_rate": 6.479910424031893e-06, "loss": 0.1131, "step": 4640 }, { "epoch": 1.27, "grad_norm": 2.116143695498577, "learning_rate": 6.478502709498649e-06, "loss": 0.1285, "step": 4641 }, { "epoch": 1.27, "grad_norm": 1.784206771270871, "learning_rate": 6.477094866528764e-06, "loss": 0.1145, "step": 4642 }, { "epoch": 1.27, "grad_norm": 2.085787707914258, "learning_rate": 6.475686895244534e-06, "loss": 0.1342, "step": 4643 }, { "epoch": 1.27, "grad_norm": 2.009961567625247, "learning_rate": 6.474278795768272e-06, "loss": 0.1424, "step": 4644 }, { "epoch": 1.27, "grad_norm": 1.9924155504243144, "learning_rate": 6.472870568222295e-06, "loss": 0.1214, "step": 4645 }, { "epoch": 1.27, "grad_norm": 1.8415606436676872, "learning_rate": 6.471462212728936e-06, "loss": 0.1104, "step": 4646 }, { "epoch": 1.27, "grad_norm": 1.7942278663976636, "learning_rate": 6.470053729410541e-06, "loss": 0.1049, "step": 4647 }, { "epoch": 1.27, "grad_norm": 1.827009715074889, "learning_rate": 6.4686451183894604e-06, "loss": 0.1176, "step": 4648 }, { "epoch": 1.27, "grad_norm": 2.0335119483914674, "learning_rate": 6.467236379788061e-06, "loss": 0.1292, "step": 4649 }, { "epoch": 1.27, "grad_norm": 1.9735863372655535, "learning_rate": 6.4658275137287196e-06, "loss": 0.1361, "step": 4650 }, { "epoch": 1.27, "grad_norm": 2.2233048808284557, "learning_rate": 6.464418520333821e-06, "loss": 0.1296, "step": 4651 }, { "epoch": 1.27, "grad_norm": 1.7564361418175638, "learning_rate": 6.463009399725767e-06, "loss": 0.1313, "step": 4652 }, { "epoch": 1.27, "grad_norm": 1.7767552095739203, "learning_rate": 6.461600152026966e-06, "loss": 0.1128, "step": 4653 }, { "epoch": 1.27, "grad_norm": 1.7891359617897904, "learning_rate": 6.460190777359836e-06, "loss": 0.1091, "step": 4654 }, { "epoch": 1.27, "grad_norm": 1.938453573793179, "learning_rate": 6.458781275846811e-06, "loss": 0.1109, "step": 4655 }, { "epoch": 1.27, "grad_norm": 1.565924447426406, "learning_rate": 6.457371647610334e-06, "loss": 0.0818, "step": 4656 }, { "epoch": 1.27, "grad_norm": 1.8599144577576698, "learning_rate": 6.455961892772857e-06, "loss": 0.1115, "step": 4657 }, { "epoch": 1.27, "grad_norm": 2.0841588304314236, "learning_rate": 6.454552011456845e-06, "loss": 0.1304, "step": 4658 }, { "epoch": 1.27, "grad_norm": 6.138594330201214, "learning_rate": 6.453142003784774e-06, "loss": 0.1145, "step": 4659 }, { "epoch": 1.27, "grad_norm": 2.1280666802232138, "learning_rate": 6.4517318698791294e-06, "loss": 0.1192, "step": 4660 }, { "epoch": 1.27, "grad_norm": 1.9899606842328335, "learning_rate": 6.45032160986241e-06, "loss": 0.1521, "step": 4661 }, { "epoch": 1.27, "grad_norm": 2.0137320504038945, "learning_rate": 6.448911223857124e-06, "loss": 0.1295, "step": 4662 }, { "epoch": 1.27, "grad_norm": 1.8831779477340354, "learning_rate": 6.44750071198579e-06, "loss": 0.1256, "step": 4663 }, { "epoch": 1.27, "grad_norm": 1.9674529933839957, "learning_rate": 6.446090074370939e-06, "loss": 0.1103, "step": 4664 }, { "epoch": 1.27, "grad_norm": 1.8373291262761091, "learning_rate": 6.444679311135112e-06, "loss": 0.1072, "step": 4665 }, { "epoch": 1.27, "grad_norm": 2.147294083901844, "learning_rate": 6.4432684224008615e-06, "loss": 0.1283, "step": 4666 }, { "epoch": 1.27, "grad_norm": 2.1431550886958326, "learning_rate": 6.441857408290751e-06, "loss": 0.1445, "step": 4667 }, { "epoch": 1.27, "grad_norm": 2.019178412078036, "learning_rate": 6.440446268927352e-06, "loss": 0.1403, "step": 4668 }, { "epoch": 1.27, "grad_norm": 1.7512739075778292, "learning_rate": 6.4390350044332514e-06, "loss": 0.1172, "step": 4669 }, { "epoch": 1.27, "grad_norm": 1.8060631994893221, "learning_rate": 6.437623614931045e-06, "loss": 0.0916, "step": 4670 }, { "epoch": 1.28, "grad_norm": 1.9848273389112192, "learning_rate": 6.43621210054334e-06, "loss": 0.1239, "step": 4671 }, { "epoch": 1.28, "grad_norm": 2.0222309183780234, "learning_rate": 6.434800461392752e-06, "loss": 0.1437, "step": 4672 }, { "epoch": 1.28, "grad_norm": 1.9642637158232656, "learning_rate": 6.4333886976019085e-06, "loss": 0.1115, "step": 4673 }, { "epoch": 1.28, "grad_norm": 1.8312092685760508, "learning_rate": 6.431976809293452e-06, "loss": 0.1178, "step": 4674 }, { "epoch": 1.28, "grad_norm": 2.2139286211226783, "learning_rate": 6.430564796590028e-06, "loss": 0.1209, "step": 4675 }, { "epoch": 1.28, "grad_norm": 1.9691512732112373, "learning_rate": 6.429152659614302e-06, "loss": 0.1089, "step": 4676 }, { "epoch": 1.28, "grad_norm": 2.0093226940421354, "learning_rate": 6.427740398488943e-06, "loss": 0.1198, "step": 4677 }, { "epoch": 1.28, "grad_norm": 1.8782675022294457, "learning_rate": 6.4263280133366326e-06, "loss": 0.1016, "step": 4678 }, { "epoch": 1.28, "grad_norm": 2.025372464707761, "learning_rate": 6.424915504280065e-06, "loss": 0.1221, "step": 4679 }, { "epoch": 1.28, "grad_norm": 1.8942523592540887, "learning_rate": 6.423502871441943e-06, "loss": 0.1047, "step": 4680 }, { "epoch": 1.28, "grad_norm": 1.8305540486447613, "learning_rate": 6.422090114944982e-06, "loss": 0.0999, "step": 4681 }, { "epoch": 1.28, "grad_norm": 1.9824460351947288, "learning_rate": 6.420677234911908e-06, "loss": 0.1134, "step": 4682 }, { "epoch": 1.28, "grad_norm": 1.980943268880249, "learning_rate": 6.4192642314654565e-06, "loss": 0.1017, "step": 4683 }, { "epoch": 1.28, "grad_norm": 2.3973803672232, "learning_rate": 6.417851104728372e-06, "loss": 0.1454, "step": 4684 }, { "epoch": 1.28, "grad_norm": 2.269742852905679, "learning_rate": 6.416437854823414e-06, "loss": 0.1454, "step": 4685 }, { "epoch": 1.28, "grad_norm": 1.8811875135857112, "learning_rate": 6.415024481873352e-06, "loss": 0.1041, "step": 4686 }, { "epoch": 1.28, "grad_norm": 1.678128253002755, "learning_rate": 6.413610986000963e-06, "loss": 0.0906, "step": 4687 }, { "epoch": 1.28, "grad_norm": 2.0292386810676106, "learning_rate": 6.412197367329036e-06, "loss": 0.1285, "step": 4688 }, { "epoch": 1.28, "grad_norm": 1.8265625715311626, "learning_rate": 6.4107836259803745e-06, "loss": 0.1115, "step": 4689 }, { "epoch": 1.28, "grad_norm": 1.7279577169374565, "learning_rate": 6.409369762077784e-06, "loss": 0.1151, "step": 4690 }, { "epoch": 1.28, "grad_norm": 1.8487289179792048, "learning_rate": 6.40795577574409e-06, "loss": 0.1145, "step": 4691 }, { "epoch": 1.28, "grad_norm": 2.058754207847507, "learning_rate": 6.406541667102126e-06, "loss": 0.1125, "step": 4692 }, { "epoch": 1.28, "grad_norm": 1.5655007882651693, "learning_rate": 6.40512743627473e-06, "loss": 0.0945, "step": 4693 }, { "epoch": 1.28, "grad_norm": 2.291245959863372, "learning_rate": 6.403713083384758e-06, "loss": 0.1495, "step": 4694 }, { "epoch": 1.28, "grad_norm": 1.9898321885396917, "learning_rate": 6.402298608555076e-06, "loss": 0.1242, "step": 4695 }, { "epoch": 1.28, "grad_norm": 2.450414734862128, "learning_rate": 6.4008840119085535e-06, "loss": 0.1332, "step": 4696 }, { "epoch": 1.28, "grad_norm": 1.8306260902591966, "learning_rate": 6.399469293568079e-06, "loss": 0.1152, "step": 4697 }, { "epoch": 1.28, "grad_norm": 1.845292472734369, "learning_rate": 6.398054453656549e-06, "loss": 0.1239, "step": 4698 }, { "epoch": 1.28, "grad_norm": 1.9228471501464572, "learning_rate": 6.396639492296868e-06, "loss": 0.1157, "step": 4699 }, { "epoch": 1.28, "grad_norm": 1.8702767947417749, "learning_rate": 6.3952244096119535e-06, "loss": 0.1188, "step": 4700 }, { "epoch": 1.28, "grad_norm": 1.8729537584239604, "learning_rate": 6.393809205724734e-06, "loss": 0.1147, "step": 4701 }, { "epoch": 1.28, "grad_norm": 1.8927800020065917, "learning_rate": 6.392393880758144e-06, "loss": 0.134, "step": 4702 }, { "epoch": 1.28, "grad_norm": 2.18543663183883, "learning_rate": 6.390978434835135e-06, "loss": 0.1329, "step": 4703 }, { "epoch": 1.28, "grad_norm": 2.109917707510275, "learning_rate": 6.389562868078666e-06, "loss": 0.1407, "step": 4704 }, { "epoch": 1.28, "grad_norm": 2.2004734083488935, "learning_rate": 6.388147180611705e-06, "loss": 0.1439, "step": 4705 }, { "epoch": 1.28, "grad_norm": 1.9186248989142887, "learning_rate": 6.386731372557231e-06, "loss": 0.124, "step": 4706 }, { "epoch": 1.29, "grad_norm": 2.292624624670308, "learning_rate": 6.385315444038238e-06, "loss": 0.1102, "step": 4707 }, { "epoch": 1.29, "grad_norm": 1.7955428450545798, "learning_rate": 6.383899395177724e-06, "loss": 0.0923, "step": 4708 }, { "epoch": 1.29, "grad_norm": 2.0387533499635215, "learning_rate": 6.3824832260987e-06, "loss": 0.1379, "step": 4709 }, { "epoch": 1.29, "grad_norm": 2.0665160561807894, "learning_rate": 6.381066936924189e-06, "loss": 0.129, "step": 4710 }, { "epoch": 1.29, "grad_norm": 1.7487152595087074, "learning_rate": 6.379650527777224e-06, "loss": 0.1035, "step": 4711 }, { "epoch": 1.29, "grad_norm": 1.7387690567688239, "learning_rate": 6.378233998780846e-06, "loss": 0.117, "step": 4712 }, { "epoch": 1.29, "grad_norm": 1.9632437273407148, "learning_rate": 6.376817350058109e-06, "loss": 0.1147, "step": 4713 }, { "epoch": 1.29, "grad_norm": 2.2039138139857766, "learning_rate": 6.375400581732076e-06, "loss": 0.1139, "step": 4714 }, { "epoch": 1.29, "grad_norm": 1.7624752146613907, "learning_rate": 6.373983693925819e-06, "loss": 0.103, "step": 4715 }, { "epoch": 1.29, "grad_norm": 2.2372372192109204, "learning_rate": 6.372566686762427e-06, "loss": 0.1421, "step": 4716 }, { "epoch": 1.29, "grad_norm": 1.672289964724045, "learning_rate": 6.37114956036499e-06, "loss": 0.0828, "step": 4717 }, { "epoch": 1.29, "grad_norm": 1.9898122915555132, "learning_rate": 6.369732314856614e-06, "loss": 0.1225, "step": 4718 }, { "epoch": 1.29, "grad_norm": 2.1464142311126366, "learning_rate": 6.368314950360416e-06, "loss": 0.1197, "step": 4719 }, { "epoch": 1.29, "grad_norm": 1.8841631657173143, "learning_rate": 6.366897466999519e-06, "loss": 0.1041, "step": 4720 }, { "epoch": 1.29, "grad_norm": 2.0668124989360934, "learning_rate": 6.3654798648970605e-06, "loss": 0.1429, "step": 4721 }, { "epoch": 1.29, "grad_norm": 1.917378582219527, "learning_rate": 6.364062144176188e-06, "loss": 0.1105, "step": 4722 }, { "epoch": 1.29, "grad_norm": 2.296647153848635, "learning_rate": 6.362644304960055e-06, "loss": 0.1374, "step": 4723 }, { "epoch": 1.29, "grad_norm": 1.9799489605382683, "learning_rate": 6.36122634737183e-06, "loss": 0.1369, "step": 4724 }, { "epoch": 1.29, "grad_norm": 1.888744617172784, "learning_rate": 6.359808271534691e-06, "loss": 0.1251, "step": 4725 }, { "epoch": 1.29, "grad_norm": 1.802733871180748, "learning_rate": 6.358390077571823e-06, "loss": 0.1019, "step": 4726 }, { "epoch": 1.29, "grad_norm": 1.7722024760365744, "learning_rate": 6.356971765606427e-06, "loss": 0.1051, "step": 4727 }, { "epoch": 1.29, "grad_norm": 1.94503672819843, "learning_rate": 6.355553335761708e-06, "loss": 0.1128, "step": 4728 }, { "epoch": 1.29, "grad_norm": 2.0860697685187395, "learning_rate": 6.354134788160885e-06, "loss": 0.1347, "step": 4729 }, { "epoch": 1.29, "grad_norm": 2.1449484116051085, "learning_rate": 6.352716122927187e-06, "loss": 0.1222, "step": 4730 }, { "epoch": 1.29, "grad_norm": 1.7781092502776699, "learning_rate": 6.351297340183852e-06, "loss": 0.1225, "step": 4731 }, { "epoch": 1.29, "grad_norm": 2.0266709716155114, "learning_rate": 6.349878440054129e-06, "loss": 0.1295, "step": 4732 }, { "epoch": 1.29, "grad_norm": 1.7432872652801654, "learning_rate": 6.348459422661276e-06, "loss": 0.1074, "step": 4733 }, { "epoch": 1.29, "grad_norm": 2.1246569000664084, "learning_rate": 6.3470402881285635e-06, "loss": 0.1077, "step": 4734 }, { "epoch": 1.29, "grad_norm": 1.9933213179922722, "learning_rate": 6.34562103657927e-06, "loss": 0.1159, "step": 4735 }, { "epoch": 1.29, "grad_norm": 1.79867290164734, "learning_rate": 6.344201668136687e-06, "loss": 0.1109, "step": 4736 }, { "epoch": 1.29, "grad_norm": 1.7053131110618118, "learning_rate": 6.342782182924112e-06, "loss": 0.0828, "step": 4737 }, { "epoch": 1.29, "grad_norm": 1.994551232335655, "learning_rate": 6.341362581064856e-06, "loss": 0.1387, "step": 4738 }, { "epoch": 1.29, "grad_norm": 1.9067712028932886, "learning_rate": 6.3399428626822375e-06, "loss": 0.1248, "step": 4739 }, { "epoch": 1.29, "grad_norm": 2.018007750684826, "learning_rate": 6.338523027899589e-06, "loss": 0.1453, "step": 4740 }, { "epoch": 1.29, "grad_norm": 2.0104198781409166, "learning_rate": 6.337103076840248e-06, "loss": 0.1077, "step": 4741 }, { "epoch": 1.29, "grad_norm": 1.7877783748911336, "learning_rate": 6.3356830096275666e-06, "loss": 0.0894, "step": 4742 }, { "epoch": 1.29, "grad_norm": 2.001070498243399, "learning_rate": 6.334262826384905e-06, "loss": 0.1181, "step": 4743 }, { "epoch": 1.3, "grad_norm": 1.8804212981654032, "learning_rate": 6.332842527235632e-06, "loss": 0.1142, "step": 4744 }, { "epoch": 1.3, "grad_norm": 1.8804941409848202, "learning_rate": 6.331422112303132e-06, "loss": 0.1202, "step": 4745 }, { "epoch": 1.3, "grad_norm": 1.9911130392424214, "learning_rate": 6.3300015817107895e-06, "loss": 0.1278, "step": 4746 }, { "epoch": 1.3, "grad_norm": 2.038769948738525, "learning_rate": 6.3285809355820106e-06, "loss": 0.1301, "step": 4747 }, { "epoch": 1.3, "grad_norm": 1.9623091732661166, "learning_rate": 6.327160174040205e-06, "loss": 0.1217, "step": 4748 }, { "epoch": 1.3, "grad_norm": 2.4064429967832086, "learning_rate": 6.32573929720879e-06, "loss": 0.1238, "step": 4749 }, { "epoch": 1.3, "grad_norm": 2.0140086885287403, "learning_rate": 6.324318305211201e-06, "loss": 0.1318, "step": 4750 }, { "epoch": 1.3, "grad_norm": 1.9705698499649666, "learning_rate": 6.3228971981708765e-06, "loss": 0.1336, "step": 4751 }, { "epoch": 1.3, "grad_norm": 1.9915688180462514, "learning_rate": 6.321475976211267e-06, "loss": 0.1177, "step": 4752 }, { "epoch": 1.3, "grad_norm": 2.0354044399723716, "learning_rate": 6.320054639455832e-06, "loss": 0.1178, "step": 4753 }, { "epoch": 1.3, "grad_norm": 2.0613693979582624, "learning_rate": 6.318633188028045e-06, "loss": 0.1377, "step": 4754 }, { "epoch": 1.3, "grad_norm": 2.411880674874074, "learning_rate": 6.317211622051384e-06, "loss": 0.1322, "step": 4755 }, { "epoch": 1.3, "grad_norm": 2.0086784414083096, "learning_rate": 6.315789941649341e-06, "loss": 0.126, "step": 4756 }, { "epoch": 1.3, "grad_norm": 1.9471495685372828, "learning_rate": 6.314368146945418e-06, "loss": 0.11, "step": 4757 }, { "epoch": 1.3, "grad_norm": 1.9444258779939045, "learning_rate": 6.312946238063121e-06, "loss": 0.114, "step": 4758 }, { "epoch": 1.3, "grad_norm": 2.119899727989978, "learning_rate": 6.311524215125975e-06, "loss": 0.1419, "step": 4759 }, { "epoch": 1.3, "grad_norm": 1.7902925628211697, "learning_rate": 6.310102078257508e-06, "loss": 0.1081, "step": 4760 }, { "epoch": 1.3, "grad_norm": 1.9553573925504344, "learning_rate": 6.30867982758126e-06, "loss": 0.1292, "step": 4761 }, { "epoch": 1.3, "grad_norm": 2.1461333123633977, "learning_rate": 6.307257463220782e-06, "loss": 0.1405, "step": 4762 }, { "epoch": 1.3, "grad_norm": 1.8098940965239325, "learning_rate": 6.3058349852996345e-06, "loss": 0.1253, "step": 4763 }, { "epoch": 1.3, "grad_norm": 1.5227095387454714, "learning_rate": 6.304412393941386e-06, "loss": 0.1, "step": 4764 }, { "epoch": 1.3, "grad_norm": 1.8452370328167844, "learning_rate": 6.3029896892696155e-06, "loss": 0.1132, "step": 4765 }, { "epoch": 1.3, "grad_norm": 1.659252963081831, "learning_rate": 6.301566871407915e-06, "loss": 0.1148, "step": 4766 }, { "epoch": 1.3, "grad_norm": 1.9225785943092828, "learning_rate": 6.300143940479881e-06, "loss": 0.0986, "step": 4767 }, { "epoch": 1.3, "grad_norm": 1.9244229679954448, "learning_rate": 6.298720896609125e-06, "loss": 0.1189, "step": 4768 }, { "epoch": 1.3, "grad_norm": 1.9990178513490748, "learning_rate": 6.297297739919266e-06, "loss": 0.1085, "step": 4769 }, { "epoch": 1.3, "grad_norm": 1.85466110080012, "learning_rate": 6.295874470533929e-06, "loss": 0.1252, "step": 4770 }, { "epoch": 1.3, "grad_norm": 1.8281269984137958, "learning_rate": 6.294451088576757e-06, "loss": 0.112, "step": 4771 }, { "epoch": 1.3, "grad_norm": 1.8657391080439982, "learning_rate": 6.293027594171397e-06, "loss": 0.1117, "step": 4772 }, { "epoch": 1.3, "grad_norm": 2.054295315157647, "learning_rate": 6.291603987441506e-06, "loss": 0.1119, "step": 4773 }, { "epoch": 1.3, "grad_norm": 2.038378911260525, "learning_rate": 6.290180268510753e-06, "loss": 0.1393, "step": 4774 }, { "epoch": 1.3, "grad_norm": 2.0465552647400203, "learning_rate": 6.288756437502816e-06, "loss": 0.1199, "step": 4775 }, { "epoch": 1.3, "grad_norm": 1.9839970112236058, "learning_rate": 6.28733249454138e-06, "loss": 0.1302, "step": 4776 }, { "epoch": 1.3, "grad_norm": 1.9735301545719488, "learning_rate": 6.2859084397501434e-06, "loss": 0.1121, "step": 4777 }, { "epoch": 1.3, "grad_norm": 1.933232469921969, "learning_rate": 6.2844842732528145e-06, "loss": 0.0981, "step": 4778 }, { "epoch": 1.3, "grad_norm": 1.9711930077916098, "learning_rate": 6.283059995173109e-06, "loss": 0.128, "step": 4779 }, { "epoch": 1.3, "grad_norm": 2.157327713639551, "learning_rate": 6.281635605634751e-06, "loss": 0.1427, "step": 4780 }, { "epoch": 1.31, "grad_norm": 2.0426310662453577, "learning_rate": 6.280211104761479e-06, "loss": 0.1234, "step": 4781 }, { "epoch": 1.31, "grad_norm": 1.9756964040824303, "learning_rate": 6.278786492677037e-06, "loss": 0.1317, "step": 4782 }, { "epoch": 1.31, "grad_norm": 2.001724170659894, "learning_rate": 6.2773617695051806e-06, "loss": 0.1135, "step": 4783 }, { "epoch": 1.31, "grad_norm": 1.9537172100205218, "learning_rate": 6.275936935369675e-06, "loss": 0.1278, "step": 4784 }, { "epoch": 1.31, "grad_norm": 1.7665908496400504, "learning_rate": 6.274511990394294e-06, "loss": 0.1046, "step": 4785 }, { "epoch": 1.31, "grad_norm": 1.9431417544789436, "learning_rate": 6.273086934702823e-06, "loss": 0.1194, "step": 4786 }, { "epoch": 1.31, "grad_norm": 2.3004261283855563, "learning_rate": 6.271661768419055e-06, "loss": 0.1539, "step": 4787 }, { "epoch": 1.31, "grad_norm": 1.9380146680491177, "learning_rate": 6.270236491666792e-06, "loss": 0.113, "step": 4788 }, { "epoch": 1.31, "grad_norm": 1.8238071960149504, "learning_rate": 6.268811104569849e-06, "loss": 0.1024, "step": 4789 }, { "epoch": 1.31, "grad_norm": 1.981209537223023, "learning_rate": 6.267385607252048e-06, "loss": 0.1099, "step": 4790 }, { "epoch": 1.31, "grad_norm": 1.9878156928560995, "learning_rate": 6.265959999837219e-06, "loss": 0.1154, "step": 4791 }, { "epoch": 1.31, "grad_norm": 2.0035518830800165, "learning_rate": 6.2645342824492065e-06, "loss": 0.1375, "step": 4792 }, { "epoch": 1.31, "grad_norm": 1.8018096634099765, "learning_rate": 6.263108455211862e-06, "loss": 0.1117, "step": 4793 }, { "epoch": 1.31, "grad_norm": 1.7187858370181939, "learning_rate": 6.261682518249043e-06, "loss": 0.116, "step": 4794 }, { "epoch": 1.31, "grad_norm": 1.8433209777807242, "learning_rate": 6.260256471684622e-06, "loss": 0.1284, "step": 4795 }, { "epoch": 1.31, "grad_norm": 1.9554113777640811, "learning_rate": 6.258830315642479e-06, "loss": 0.1068, "step": 4796 }, { "epoch": 1.31, "grad_norm": 1.9579278787057743, "learning_rate": 6.257404050246503e-06, "loss": 0.123, "step": 4797 }, { "epoch": 1.31, "grad_norm": 2.1057885769440947, "learning_rate": 6.255977675620592e-06, "loss": 0.1166, "step": 4798 }, { "epoch": 1.31, "grad_norm": 2.0729024591941334, "learning_rate": 6.254551191888656e-06, "loss": 0.119, "step": 4799 }, { "epoch": 1.31, "grad_norm": 2.0832789371539056, "learning_rate": 6.25312459917461e-06, "loss": 0.1202, "step": 4800 }, { "epoch": 1.31, "grad_norm": 1.8964979656182601, "learning_rate": 6.251697897602384e-06, "loss": 0.1182, "step": 4801 }, { "epoch": 1.31, "grad_norm": 1.8041336466073548, "learning_rate": 6.2502710872959134e-06, "loss": 0.092, "step": 4802 }, { "epoch": 1.31, "grad_norm": 1.9036793097133802, "learning_rate": 6.248844168379144e-06, "loss": 0.1271, "step": 4803 }, { "epoch": 1.31, "grad_norm": 1.9410457536194146, "learning_rate": 6.247417140976033e-06, "loss": 0.1282, "step": 4804 }, { "epoch": 1.31, "grad_norm": 1.9498557309340399, "learning_rate": 6.2459900052105445e-06, "loss": 0.1184, "step": 4805 }, { "epoch": 1.31, "grad_norm": 1.6732274802251281, "learning_rate": 6.2445627612066526e-06, "loss": 0.0999, "step": 4806 }, { "epoch": 1.31, "grad_norm": 1.8747749569817358, "learning_rate": 6.243135409088341e-06, "loss": 0.1139, "step": 4807 }, { "epoch": 1.31, "grad_norm": 2.1677434683595416, "learning_rate": 6.241707948979604e-06, "loss": 0.1349, "step": 4808 }, { "epoch": 1.31, "grad_norm": 1.8921749709926956, "learning_rate": 6.240280381004444e-06, "loss": 0.122, "step": 4809 }, { "epoch": 1.31, "grad_norm": 1.8943805397377445, "learning_rate": 6.23885270528687e-06, "loss": 0.1197, "step": 4810 }, { "epoch": 1.31, "grad_norm": 1.8493354871440684, "learning_rate": 6.237424921950909e-06, "loss": 0.1144, "step": 4811 }, { "epoch": 1.31, "grad_norm": 1.8953022008850646, "learning_rate": 6.235997031120585e-06, "loss": 0.1343, "step": 4812 }, { "epoch": 1.31, "grad_norm": 2.552142248735999, "learning_rate": 6.234569032919944e-06, "loss": 0.1161, "step": 4813 }, { "epoch": 1.31, "grad_norm": 1.790407437838741, "learning_rate": 6.233140927473033e-06, "loss": 0.1222, "step": 4814 }, { "epoch": 1.31, "grad_norm": 1.9373260027044, "learning_rate": 6.231712714903909e-06, "loss": 0.1214, "step": 4815 }, { "epoch": 1.31, "grad_norm": 2.0553997847416605, "learning_rate": 6.230284395336643e-06, "loss": 0.116, "step": 4816 }, { "epoch": 1.32, "grad_norm": 2.075450076080619, "learning_rate": 6.22885596889531e-06, "loss": 0.114, "step": 4817 }, { "epoch": 1.32, "grad_norm": 2.062329088847458, "learning_rate": 6.227427435703997e-06, "loss": 0.141, "step": 4818 }, { "epoch": 1.32, "grad_norm": 1.7435307307738757, "learning_rate": 6.2259987958868005e-06, "loss": 0.1136, "step": 4819 }, { "epoch": 1.32, "grad_norm": 1.8099285043582265, "learning_rate": 6.224570049567825e-06, "loss": 0.1131, "step": 4820 }, { "epoch": 1.32, "grad_norm": 1.840382332357282, "learning_rate": 6.223141196871185e-06, "loss": 0.1132, "step": 4821 }, { "epoch": 1.32, "grad_norm": 1.9753181904438506, "learning_rate": 6.221712237921005e-06, "loss": 0.1194, "step": 4822 }, { "epoch": 1.32, "grad_norm": 2.0296908058577587, "learning_rate": 6.220283172841416e-06, "loss": 0.1519, "step": 4823 }, { "epoch": 1.32, "grad_norm": 1.738773031226602, "learning_rate": 6.21885400175656e-06, "loss": 0.1072, "step": 4824 }, { "epoch": 1.32, "grad_norm": 1.9847561177958029, "learning_rate": 6.217424724790592e-06, "loss": 0.1317, "step": 4825 }, { "epoch": 1.32, "grad_norm": 2.1882594840092846, "learning_rate": 6.215995342067666e-06, "loss": 0.0978, "step": 4826 }, { "epoch": 1.32, "grad_norm": 1.7091302195872704, "learning_rate": 6.214565853711956e-06, "loss": 0.1073, "step": 4827 }, { "epoch": 1.32, "grad_norm": 1.758939439252944, "learning_rate": 6.213136259847642e-06, "loss": 0.1196, "step": 4828 }, { "epoch": 1.32, "grad_norm": 1.894513268226193, "learning_rate": 6.211706560598909e-06, "loss": 0.1165, "step": 4829 }, { "epoch": 1.32, "grad_norm": 1.7927417877245728, "learning_rate": 6.2102767560899545e-06, "loss": 0.1233, "step": 4830 }, { "epoch": 1.32, "grad_norm": 2.031101088555856, "learning_rate": 6.208846846444987e-06, "loss": 0.1084, "step": 4831 }, { "epoch": 1.32, "grad_norm": 1.9153611707796494, "learning_rate": 6.207416831788219e-06, "loss": 0.1313, "step": 4832 }, { "epoch": 1.32, "grad_norm": 1.8517018418779887, "learning_rate": 6.205986712243876e-06, "loss": 0.1248, "step": 4833 }, { "epoch": 1.32, "grad_norm": 1.951504780850715, "learning_rate": 6.204556487936193e-06, "loss": 0.1205, "step": 4834 }, { "epoch": 1.32, "grad_norm": 2.116722513445758, "learning_rate": 6.203126158989411e-06, "loss": 0.1615, "step": 4835 }, { "epoch": 1.32, "grad_norm": 1.9866247511889275, "learning_rate": 6.201695725527781e-06, "loss": 0.118, "step": 4836 }, { "epoch": 1.32, "grad_norm": 2.21921291709542, "learning_rate": 6.200265187675568e-06, "loss": 0.1273, "step": 4837 }, { "epoch": 1.32, "grad_norm": 1.8261340532859198, "learning_rate": 6.198834545557038e-06, "loss": 0.1003, "step": 4838 }, { "epoch": 1.32, "grad_norm": 1.8693238963561363, "learning_rate": 6.197403799296471e-06, "loss": 0.109, "step": 4839 }, { "epoch": 1.32, "grad_norm": 2.0368100655309034, "learning_rate": 6.195972949018157e-06, "loss": 0.1318, "step": 4840 }, { "epoch": 1.32, "grad_norm": 1.7440186239494408, "learning_rate": 6.194541994846388e-06, "loss": 0.0977, "step": 4841 }, { "epoch": 1.32, "grad_norm": 1.7798737169981045, "learning_rate": 6.193110936905476e-06, "loss": 0.1191, "step": 4842 }, { "epoch": 1.32, "grad_norm": 2.098212759074431, "learning_rate": 6.191679775319734e-06, "loss": 0.1341, "step": 4843 }, { "epoch": 1.32, "grad_norm": 1.8978721030925707, "learning_rate": 6.190248510213486e-06, "loss": 0.1049, "step": 4844 }, { "epoch": 1.32, "grad_norm": 1.8161119314906053, "learning_rate": 6.188817141711063e-06, "loss": 0.1189, "step": 4845 }, { "epoch": 1.32, "grad_norm": 2.30653690693518, "learning_rate": 6.1873856699368115e-06, "loss": 0.1209, "step": 4846 }, { "epoch": 1.32, "grad_norm": 1.8492552840920051, "learning_rate": 6.185954095015079e-06, "loss": 0.1105, "step": 4847 }, { "epoch": 1.32, "grad_norm": 2.017640080619557, "learning_rate": 6.184522417070227e-06, "loss": 0.1182, "step": 4848 }, { "epoch": 1.32, "grad_norm": 1.9889419835980953, "learning_rate": 6.183090636226625e-06, "loss": 0.1276, "step": 4849 }, { "epoch": 1.32, "grad_norm": 2.105367690109629, "learning_rate": 6.181658752608649e-06, "loss": 0.1302, "step": 4850 }, { "epoch": 1.32, "grad_norm": 1.8261736367652186, "learning_rate": 6.180226766340688e-06, "loss": 0.1231, "step": 4851 }, { "epoch": 1.32, "grad_norm": 1.5989238417814968, "learning_rate": 6.178794677547138e-06, "loss": 0.1101, "step": 4852 }, { "epoch": 1.32, "grad_norm": 2.0655123239058333, "learning_rate": 6.1773624863524e-06, "loss": 0.1307, "step": 4853 }, { "epoch": 1.33, "grad_norm": 1.7420354190303058, "learning_rate": 6.175930192880891e-06, "loss": 0.1234, "step": 4854 }, { "epoch": 1.33, "grad_norm": 1.983555966873605, "learning_rate": 6.174497797257034e-06, "loss": 0.1115, "step": 4855 }, { "epoch": 1.33, "grad_norm": 1.7439263386363626, "learning_rate": 6.173065299605257e-06, "loss": 0.1035, "step": 4856 }, { "epoch": 1.33, "grad_norm": 1.7460730836687877, "learning_rate": 6.171632700050003e-06, "loss": 0.1191, "step": 4857 }, { "epoch": 1.33, "grad_norm": 2.025897958102639, "learning_rate": 6.1701999987157225e-06, "loss": 0.1169, "step": 4858 }, { "epoch": 1.33, "grad_norm": 1.8515608098733858, "learning_rate": 6.168767195726868e-06, "loss": 0.1092, "step": 4859 }, { "epoch": 1.33, "grad_norm": 1.7902842933010585, "learning_rate": 6.16733429120791e-06, "loss": 0.1132, "step": 4860 }, { "epoch": 1.33, "grad_norm": 2.0402755488426525, "learning_rate": 6.165901285283326e-06, "loss": 0.1312, "step": 4861 }, { "epoch": 1.33, "grad_norm": 2.1572285788412575, "learning_rate": 6.164468178077595e-06, "loss": 0.1467, "step": 4862 }, { "epoch": 1.33, "grad_norm": 1.9344974762607847, "learning_rate": 6.163034969715214e-06, "loss": 0.1189, "step": 4863 }, { "epoch": 1.33, "grad_norm": 1.8207147522886569, "learning_rate": 6.161601660320684e-06, "loss": 0.1121, "step": 4864 }, { "epoch": 1.33, "grad_norm": 1.893640736028462, "learning_rate": 6.160168250018516e-06, "loss": 0.0965, "step": 4865 }, { "epoch": 1.33, "grad_norm": 1.9955242086047664, "learning_rate": 6.158734738933228e-06, "loss": 0.1195, "step": 4866 }, { "epoch": 1.33, "grad_norm": 2.1306176885166015, "learning_rate": 6.1573011271893515e-06, "loss": 0.1488, "step": 4867 }, { "epoch": 1.33, "grad_norm": 1.7892104762253915, "learning_rate": 6.15586741491142e-06, "loss": 0.1192, "step": 4868 }, { "epoch": 1.33, "grad_norm": 1.8917766805389953, "learning_rate": 6.154433602223979e-06, "loss": 0.1053, "step": 4869 }, { "epoch": 1.33, "grad_norm": 2.3611622194245716, "learning_rate": 6.152999689251588e-06, "loss": 0.1381, "step": 4870 }, { "epoch": 1.33, "grad_norm": 1.9887611929851323, "learning_rate": 6.151565676118805e-06, "loss": 0.1291, "step": 4871 }, { "epoch": 1.33, "grad_norm": 2.050931157823187, "learning_rate": 6.150131562950204e-06, "loss": 0.1221, "step": 4872 }, { "epoch": 1.33, "grad_norm": 1.9188897349699203, "learning_rate": 6.148697349870364e-06, "loss": 0.1193, "step": 4873 }, { "epoch": 1.33, "grad_norm": 1.9004784309760325, "learning_rate": 6.147263037003877e-06, "loss": 0.1163, "step": 4874 }, { "epoch": 1.33, "grad_norm": 1.93423289184628, "learning_rate": 6.145828624475337e-06, "loss": 0.1231, "step": 4875 }, { "epoch": 1.33, "grad_norm": 1.7730809587308083, "learning_rate": 6.144394112409356e-06, "loss": 0.1027, "step": 4876 }, { "epoch": 1.33, "grad_norm": 1.7369376687779499, "learning_rate": 6.142959500930543e-06, "loss": 0.0981, "step": 4877 }, { "epoch": 1.33, "grad_norm": 1.7293111687054623, "learning_rate": 6.1415247901635256e-06, "loss": 0.1066, "step": 4878 }, { "epoch": 1.33, "grad_norm": 1.9816602644444095, "learning_rate": 6.140089980232937e-06, "loss": 0.1314, "step": 4879 }, { "epoch": 1.33, "grad_norm": 1.9002630686190995, "learning_rate": 6.138655071263415e-06, "loss": 0.1305, "step": 4880 }, { "epoch": 1.33, "grad_norm": 2.0219745755450025, "learning_rate": 6.137220063379612e-06, "loss": 0.1052, "step": 4881 }, { "epoch": 1.33, "grad_norm": 2.019427212779509, "learning_rate": 6.135784956706186e-06, "loss": 0.1143, "step": 4882 }, { "epoch": 1.33, "grad_norm": 1.9769469324163873, "learning_rate": 6.134349751367802e-06, "loss": 0.1159, "step": 4883 }, { "epoch": 1.33, "grad_norm": 1.913974403284735, "learning_rate": 6.132914447489137e-06, "loss": 0.1263, "step": 4884 }, { "epoch": 1.33, "grad_norm": 2.288616969391857, "learning_rate": 6.131479045194875e-06, "loss": 0.1702, "step": 4885 }, { "epoch": 1.33, "grad_norm": 1.9323919256215265, "learning_rate": 6.130043544609707e-06, "loss": 0.1388, "step": 4886 }, { "epoch": 1.33, "grad_norm": 1.809332127661882, "learning_rate": 6.128607945858336e-06, "loss": 0.1194, "step": 4887 }, { "epoch": 1.33, "grad_norm": 2.342999704043993, "learning_rate": 6.127172249065471e-06, "loss": 0.1403, "step": 4888 }, { "epoch": 1.33, "grad_norm": 1.9773235167416923, "learning_rate": 6.125736454355831e-06, "loss": 0.1338, "step": 4889 }, { "epoch": 1.33, "grad_norm": 1.7618945982640297, "learning_rate": 6.124300561854139e-06, "loss": 0.1021, "step": 4890 }, { "epoch": 1.34, "grad_norm": 2.2018598817268393, "learning_rate": 6.122864571685135e-06, "loss": 0.1218, "step": 4891 }, { "epoch": 1.34, "grad_norm": 1.8836135751638283, "learning_rate": 6.121428483973559e-06, "loss": 0.1295, "step": 4892 }, { "epoch": 1.34, "grad_norm": 1.8837658093597585, "learning_rate": 6.119992298844165e-06, "loss": 0.106, "step": 4893 }, { "epoch": 1.34, "grad_norm": 1.8272297312477663, "learning_rate": 6.118556016421713e-06, "loss": 0.1193, "step": 4894 }, { "epoch": 1.34, "grad_norm": 1.9135369806054934, "learning_rate": 6.117119636830971e-06, "loss": 0.1196, "step": 4895 }, { "epoch": 1.34, "grad_norm": 2.3119131342734995, "learning_rate": 6.115683160196718e-06, "loss": 0.1203, "step": 4896 }, { "epoch": 1.34, "grad_norm": 1.8277687611017608, "learning_rate": 6.114246586643739e-06, "loss": 0.0985, "step": 4897 }, { "epoch": 1.34, "grad_norm": 2.1388876005771267, "learning_rate": 6.112809916296829e-06, "loss": 0.1439, "step": 4898 }, { "epoch": 1.34, "grad_norm": 1.589459885541316, "learning_rate": 6.11137314928079e-06, "loss": 0.0947, "step": 4899 }, { "epoch": 1.34, "grad_norm": 1.7601580968530142, "learning_rate": 6.109936285720433e-06, "loss": 0.1023, "step": 4900 }, { "epoch": 1.34, "grad_norm": 1.922006007107989, "learning_rate": 6.108499325740577e-06, "loss": 0.0975, "step": 4901 }, { "epoch": 1.34, "grad_norm": 1.7838705097233027, "learning_rate": 6.107062269466052e-06, "loss": 0.1202, "step": 4902 }, { "epoch": 1.34, "grad_norm": 1.947670472120174, "learning_rate": 6.105625117021692e-06, "loss": 0.121, "step": 4903 }, { "epoch": 1.34, "grad_norm": 1.9439048271487138, "learning_rate": 6.104187868532341e-06, "loss": 0.147, "step": 4904 }, { "epoch": 1.34, "grad_norm": 1.746878444271801, "learning_rate": 6.102750524122856e-06, "loss": 0.1122, "step": 4905 }, { "epoch": 1.34, "grad_norm": 1.9094150992437706, "learning_rate": 6.1013130839180936e-06, "loss": 0.1154, "step": 4906 }, { "epoch": 1.34, "grad_norm": 2.018840239067516, "learning_rate": 6.099875548042925e-06, "loss": 0.1289, "step": 4907 }, { "epoch": 1.34, "grad_norm": 2.145174511066835, "learning_rate": 6.098437916622231e-06, "loss": 0.1138, "step": 4908 }, { "epoch": 1.34, "grad_norm": 2.1275968938083847, "learning_rate": 6.097000189780893e-06, "loss": 0.1281, "step": 4909 }, { "epoch": 1.34, "grad_norm": 2.111484999647587, "learning_rate": 6.095562367643807e-06, "loss": 0.1204, "step": 4910 }, { "epoch": 1.34, "grad_norm": 1.9052044936938572, "learning_rate": 6.0941244503358776e-06, "loss": 0.1056, "step": 4911 }, { "epoch": 1.34, "grad_norm": 1.9951598828586277, "learning_rate": 6.0926864379820135e-06, "loss": 0.1433, "step": 4912 }, { "epoch": 1.34, "grad_norm": 2.0080645922255607, "learning_rate": 6.091248330707136e-06, "loss": 0.1265, "step": 4913 }, { "epoch": 1.34, "grad_norm": 1.851477172836219, "learning_rate": 6.089810128636173e-06, "loss": 0.1266, "step": 4914 }, { "epoch": 1.34, "grad_norm": 1.9372677265156553, "learning_rate": 6.088371831894057e-06, "loss": 0.1171, "step": 4915 }, { "epoch": 1.34, "grad_norm": 1.7032265818076067, "learning_rate": 6.086933440605733e-06, "loss": 0.1052, "step": 4916 }, { "epoch": 1.34, "grad_norm": 2.105536542212519, "learning_rate": 6.085494954896156e-06, "loss": 0.1508, "step": 4917 }, { "epoch": 1.34, "grad_norm": 2.000622536557249, "learning_rate": 6.0840563748902836e-06, "loss": 0.1152, "step": 4918 }, { "epoch": 1.34, "grad_norm": 2.275176361874426, "learning_rate": 6.082617700713083e-06, "loss": 0.1155, "step": 4919 }, { "epoch": 1.34, "grad_norm": 1.599289273385687, "learning_rate": 6.0811789324895365e-06, "loss": 0.1061, "step": 4920 }, { "epoch": 1.34, "grad_norm": 2.215750448800732, "learning_rate": 6.079740070344625e-06, "loss": 0.1605, "step": 4921 }, { "epoch": 1.34, "grad_norm": 2.2791099409385382, "learning_rate": 6.078301114403341e-06, "loss": 0.1437, "step": 4922 }, { "epoch": 1.34, "grad_norm": 1.762802683676954, "learning_rate": 6.07686206479069e-06, "loss": 0.1039, "step": 4923 }, { "epoch": 1.34, "grad_norm": 1.555928325814913, "learning_rate": 6.075422921631675e-06, "loss": 0.1071, "step": 4924 }, { "epoch": 1.34, "grad_norm": 2.077040906360435, "learning_rate": 6.073983685051319e-06, "loss": 0.1315, "step": 4925 }, { "epoch": 1.34, "grad_norm": 1.8635405108850642, "learning_rate": 6.0725443551746454e-06, "loss": 0.101, "step": 4926 }, { "epoch": 1.35, "grad_norm": 2.0995796453109206, "learning_rate": 6.071104932126687e-06, "loss": 0.1197, "step": 4927 }, { "epoch": 1.35, "grad_norm": 2.009879275551528, "learning_rate": 6.0696654160324875e-06, "loss": 0.1195, "step": 4928 }, { "epoch": 1.35, "grad_norm": 2.2620708792957065, "learning_rate": 6.068225807017096e-06, "loss": 0.1412, "step": 4929 }, { "epoch": 1.35, "grad_norm": 1.7952296679916147, "learning_rate": 6.06678610520557e-06, "loss": 0.1072, "step": 4930 }, { "epoch": 1.35, "grad_norm": 1.9705423440939835, "learning_rate": 6.065346310722976e-06, "loss": 0.116, "step": 4931 }, { "epoch": 1.35, "grad_norm": 1.6999090883129435, "learning_rate": 6.063906423694389e-06, "loss": 0.1066, "step": 4932 }, { "epoch": 1.35, "grad_norm": 1.8796616706033624, "learning_rate": 6.062466444244889e-06, "loss": 0.1243, "step": 4933 }, { "epoch": 1.35, "grad_norm": 2.0559884626043514, "learning_rate": 6.061026372499568e-06, "loss": 0.1116, "step": 4934 }, { "epoch": 1.35, "grad_norm": 1.8845821330534942, "learning_rate": 6.059586208583523e-06, "loss": 0.1235, "step": 4935 }, { "epoch": 1.35, "grad_norm": 1.7344141989550121, "learning_rate": 6.058145952621861e-06, "loss": 0.1058, "step": 4936 }, { "epoch": 1.35, "grad_norm": 1.8872765670986487, "learning_rate": 6.056705604739696e-06, "loss": 0.122, "step": 4937 }, { "epoch": 1.35, "grad_norm": 1.8218256328225666, "learning_rate": 6.055265165062149e-06, "loss": 0.1048, "step": 4938 }, { "epoch": 1.35, "grad_norm": 1.8714701326514385, "learning_rate": 6.053824633714352e-06, "loss": 0.1154, "step": 4939 }, { "epoch": 1.35, "grad_norm": 1.8014985656111493, "learning_rate": 6.0523840108214425e-06, "loss": 0.1073, "step": 4940 }, { "epoch": 1.35, "grad_norm": 1.855702892815786, "learning_rate": 6.0509432965085665e-06, "loss": 0.1098, "step": 4941 }, { "epoch": 1.35, "grad_norm": 2.017549518132829, "learning_rate": 6.049502490900877e-06, "loss": 0.1158, "step": 4942 }, { "epoch": 1.35, "grad_norm": 1.7800169097330127, "learning_rate": 6.048061594123536e-06, "loss": 0.0962, "step": 4943 }, { "epoch": 1.35, "grad_norm": 2.0495922912093616, "learning_rate": 6.046620606301716e-06, "loss": 0.1313, "step": 4944 }, { "epoch": 1.35, "grad_norm": 1.5866250909184794, "learning_rate": 6.045179527560592e-06, "loss": 0.0959, "step": 4945 }, { "epoch": 1.35, "grad_norm": 2.020444762927052, "learning_rate": 6.04373835802535e-06, "loss": 0.1187, "step": 4946 }, { "epoch": 1.35, "grad_norm": 2.0854377433145013, "learning_rate": 6.042297097821184e-06, "loss": 0.1223, "step": 4947 }, { "epoch": 1.35, "grad_norm": 1.882900900099412, "learning_rate": 6.040855747073294e-06, "loss": 0.1149, "step": 4948 }, { "epoch": 1.35, "grad_norm": 1.6224227318394695, "learning_rate": 6.039414305906892e-06, "loss": 0.099, "step": 4949 }, { "epoch": 1.35, "grad_norm": 1.9503745876084193, "learning_rate": 6.037972774447194e-06, "loss": 0.1221, "step": 4950 }, { "epoch": 1.35, "grad_norm": 1.6921267398935587, "learning_rate": 6.036531152819425e-06, "loss": 0.1125, "step": 4951 }, { "epoch": 1.35, "grad_norm": 2.0196253522305327, "learning_rate": 6.035089441148816e-06, "loss": 0.1113, "step": 4952 }, { "epoch": 1.35, "grad_norm": 1.9355851865094544, "learning_rate": 6.03364763956061e-06, "loss": 0.1123, "step": 4953 }, { "epoch": 1.35, "grad_norm": 1.9510864477490875, "learning_rate": 6.032205748180054e-06, "loss": 0.1187, "step": 4954 }, { "epoch": 1.35, "grad_norm": 1.73266849218343, "learning_rate": 6.030763767132406e-06, "loss": 0.0941, "step": 4955 }, { "epoch": 1.35, "grad_norm": 1.8752550765646894, "learning_rate": 6.0293216965429294e-06, "loss": 0.1155, "step": 4956 }, { "epoch": 1.35, "grad_norm": 1.9121570703098103, "learning_rate": 6.027879536536893e-06, "loss": 0.1186, "step": 4957 }, { "epoch": 1.35, "grad_norm": 2.3957029646050807, "learning_rate": 6.026437287239581e-06, "loss": 0.1403, "step": 4958 }, { "epoch": 1.35, "grad_norm": 1.7789354591976543, "learning_rate": 6.024994948776277e-06, "loss": 0.1085, "step": 4959 }, { "epoch": 1.35, "grad_norm": 1.7389394559466675, "learning_rate": 6.023552521272278e-06, "loss": 0.113, "step": 4960 }, { "epoch": 1.35, "grad_norm": 1.6298333586523461, "learning_rate": 6.0221100048528866e-06, "loss": 0.0952, "step": 4961 }, { "epoch": 1.35, "grad_norm": 1.7582323954970474, "learning_rate": 6.020667399643414e-06, "loss": 0.1094, "step": 4962 }, { "epoch": 1.35, "grad_norm": 2.070687729934323, "learning_rate": 6.019224705769176e-06, "loss": 0.115, "step": 4963 }, { "epoch": 1.36, "grad_norm": 1.9327608736164168, "learning_rate": 6.017781923355501e-06, "loss": 0.1198, "step": 4964 }, { "epoch": 1.36, "grad_norm": 1.9347310828801432, "learning_rate": 6.016339052527723e-06, "loss": 0.1198, "step": 4965 }, { "epoch": 1.36, "grad_norm": 1.9289071650990133, "learning_rate": 6.014896093411181e-06, "loss": 0.1145, "step": 4966 }, { "epoch": 1.36, "grad_norm": 1.8527529792686324, "learning_rate": 6.013453046131224e-06, "loss": 0.1157, "step": 4967 }, { "epoch": 1.36, "grad_norm": 1.8542076734063087, "learning_rate": 6.0120099108132126e-06, "loss": 0.1129, "step": 4968 }, { "epoch": 1.36, "grad_norm": 1.902671784114292, "learning_rate": 6.010566687582507e-06, "loss": 0.1201, "step": 4969 }, { "epoch": 1.36, "grad_norm": 1.6504671171049503, "learning_rate": 6.0091233765644796e-06, "loss": 0.1037, "step": 4970 }, { "epoch": 1.36, "grad_norm": 1.6160452605524753, "learning_rate": 6.0076799778845105e-06, "loss": 0.1063, "step": 4971 }, { "epoch": 1.36, "grad_norm": 2.002626443814734, "learning_rate": 6.0062364916679885e-06, "loss": 0.1222, "step": 4972 }, { "epoch": 1.36, "grad_norm": 1.8776185281144557, "learning_rate": 6.0047929180403065e-06, "loss": 0.1243, "step": 4973 }, { "epoch": 1.36, "grad_norm": 1.6494634514612223, "learning_rate": 6.003349257126867e-06, "loss": 0.0928, "step": 4974 }, { "epoch": 1.36, "grad_norm": 1.7440905885785756, "learning_rate": 6.00190550905308e-06, "loss": 0.1024, "step": 4975 }, { "epoch": 1.36, "grad_norm": 2.1378398123744247, "learning_rate": 6.000461673944364e-06, "loss": 0.15, "step": 4976 }, { "epoch": 1.36, "grad_norm": 2.0204688178815013, "learning_rate": 5.9990177519261435e-06, "loss": 0.12, "step": 4977 }, { "epoch": 1.36, "grad_norm": 2.1824224578324993, "learning_rate": 5.997573743123852e-06, "loss": 0.1366, "step": 4978 }, { "epoch": 1.36, "grad_norm": 1.5762183897756177, "learning_rate": 5.996129647662928e-06, "loss": 0.0829, "step": 4979 }, { "epoch": 1.36, "grad_norm": 2.0254442812204454, "learning_rate": 5.994685465668819e-06, "loss": 0.1214, "step": 4980 }, { "epoch": 1.36, "grad_norm": 2.080769381856775, "learning_rate": 5.993241197266982e-06, "loss": 0.1304, "step": 4981 }, { "epoch": 1.36, "grad_norm": 2.0758919167755088, "learning_rate": 5.99179684258288e-06, "loss": 0.1384, "step": 4982 }, { "epoch": 1.36, "grad_norm": 1.7571459561927143, "learning_rate": 5.990352401741981e-06, "loss": 0.1139, "step": 4983 }, { "epoch": 1.36, "grad_norm": 1.9482665749881678, "learning_rate": 5.988907874869764e-06, "loss": 0.133, "step": 4984 }, { "epoch": 1.36, "grad_norm": 1.9885995283699256, "learning_rate": 5.987463262091715e-06, "loss": 0.1233, "step": 4985 }, { "epoch": 1.36, "grad_norm": 1.7636513211660254, "learning_rate": 5.986018563533325e-06, "loss": 0.0794, "step": 4986 }, { "epoch": 1.36, "grad_norm": 2.0538612953934816, "learning_rate": 5.984573779320093e-06, "loss": 0.1286, "step": 4987 }, { "epoch": 1.36, "grad_norm": 1.9727186223260489, "learning_rate": 5.983128909577532e-06, "loss": 0.1469, "step": 4988 }, { "epoch": 1.36, "grad_norm": 1.7452652132484658, "learning_rate": 5.98168395443115e-06, "loss": 0.0996, "step": 4989 }, { "epoch": 1.36, "grad_norm": 1.885851806757247, "learning_rate": 5.980238914006473e-06, "loss": 0.1199, "step": 4990 }, { "epoch": 1.36, "grad_norm": 1.612526849537247, "learning_rate": 5.9787937884290325e-06, "loss": 0.1026, "step": 4991 }, { "epoch": 1.36, "grad_norm": 2.1189572322875163, "learning_rate": 5.977348577824362e-06, "loss": 0.1099, "step": 4992 }, { "epoch": 1.36, "grad_norm": 1.8267328505100635, "learning_rate": 5.975903282318009e-06, "loss": 0.1188, "step": 4993 }, { "epoch": 1.36, "grad_norm": 1.8256461121631937, "learning_rate": 5.974457902035524e-06, "loss": 0.1035, "step": 4994 }, { "epoch": 1.36, "grad_norm": 1.8914515794387323, "learning_rate": 5.973012437102466e-06, "loss": 0.1199, "step": 4995 }, { "epoch": 1.36, "grad_norm": 2.1919251568604503, "learning_rate": 5.971566887644401e-06, "loss": 0.1197, "step": 4996 }, { "epoch": 1.36, "grad_norm": 2.0191614139301737, "learning_rate": 5.970121253786907e-06, "loss": 0.1022, "step": 4997 }, { "epoch": 1.36, "grad_norm": 2.0038054881034335, "learning_rate": 5.96867553565556e-06, "loss": 0.1246, "step": 4998 }, { "epoch": 1.36, "grad_norm": 1.8980156628446445, "learning_rate": 5.967229733375952e-06, "loss": 0.123, "step": 4999 }, { "epoch": 1.37, "grad_norm": 1.9227323869861515, "learning_rate": 5.965783847073679e-06, "loss": 0.106, "step": 5000 }, { "epoch": 1.37, "grad_norm": 1.8243048521610479, "learning_rate": 5.964337876874343e-06, "loss": 0.1196, "step": 5001 }, { "epoch": 1.37, "grad_norm": 1.6680419197272611, "learning_rate": 5.962891822903555e-06, "loss": 0.1019, "step": 5002 }, { "epoch": 1.37, "grad_norm": 1.8450948812067482, "learning_rate": 5.961445685286933e-06, "loss": 0.1146, "step": 5003 }, { "epoch": 1.37, "grad_norm": 2.108562972689307, "learning_rate": 5.959999464150101e-06, "loss": 0.1617, "step": 5004 }, { "epoch": 1.37, "grad_norm": 1.9499365554759507, "learning_rate": 5.958553159618693e-06, "loss": 0.1079, "step": 5005 }, { "epoch": 1.37, "grad_norm": 1.8859355103291315, "learning_rate": 5.957106771818348e-06, "loss": 0.1222, "step": 5006 }, { "epoch": 1.37, "grad_norm": 1.8315372473982805, "learning_rate": 5.955660300874712e-06, "loss": 0.1146, "step": 5007 }, { "epoch": 1.37, "grad_norm": 2.3307066437226154, "learning_rate": 5.9542137469134405e-06, "loss": 0.1355, "step": 5008 }, { "epoch": 1.37, "grad_norm": 2.0522661107552698, "learning_rate": 5.9527671100601956e-06, "loss": 0.1228, "step": 5009 }, { "epoch": 1.37, "grad_norm": 1.8579057079758383, "learning_rate": 5.951320390440642e-06, "loss": 0.1396, "step": 5010 }, { "epoch": 1.37, "grad_norm": 1.8855469724348655, "learning_rate": 5.949873588180458e-06, "loss": 0.1171, "step": 5011 }, { "epoch": 1.37, "grad_norm": 2.144756090238146, "learning_rate": 5.948426703405327e-06, "loss": 0.1224, "step": 5012 }, { "epoch": 1.37, "grad_norm": 2.1342181296058187, "learning_rate": 5.946979736240938e-06, "loss": 0.1581, "step": 5013 }, { "epoch": 1.37, "grad_norm": 1.7121265361196136, "learning_rate": 5.945532686812987e-06, "loss": 0.117, "step": 5014 }, { "epoch": 1.37, "grad_norm": 1.810679433667134, "learning_rate": 5.944085555247181e-06, "loss": 0.1174, "step": 5015 }, { "epoch": 1.37, "grad_norm": 1.6941252956226815, "learning_rate": 5.94263834166923e-06, "loss": 0.1008, "step": 5016 }, { "epoch": 1.37, "grad_norm": 2.074915230490796, "learning_rate": 5.941191046204851e-06, "loss": 0.1227, "step": 5017 }, { "epoch": 1.37, "grad_norm": 1.8541399783188595, "learning_rate": 5.939743668979774e-06, "loss": 0.133, "step": 5018 }, { "epoch": 1.37, "grad_norm": 2.188606767937708, "learning_rate": 5.938296210119727e-06, "loss": 0.141, "step": 5019 }, { "epoch": 1.37, "grad_norm": 1.7683583488225754, "learning_rate": 5.9368486697504525e-06, "loss": 0.098, "step": 5020 }, { "epoch": 1.37, "grad_norm": 1.950058543180246, "learning_rate": 5.935401047997697e-06, "loss": 0.1345, "step": 5021 }, { "epoch": 1.37, "grad_norm": 1.931108125559404, "learning_rate": 5.933953344987215e-06, "loss": 0.1246, "step": 5022 }, { "epoch": 1.37, "grad_norm": 2.0354671410815186, "learning_rate": 5.932505560844766e-06, "loss": 0.104, "step": 5023 }, { "epoch": 1.37, "grad_norm": 2.022780983806588, "learning_rate": 5.93105769569612e-06, "loss": 0.136, "step": 5024 }, { "epoch": 1.37, "grad_norm": 1.9322876665778022, "learning_rate": 5.929609749667052e-06, "loss": 0.1095, "step": 5025 }, { "epoch": 1.37, "grad_norm": 2.1609916934143016, "learning_rate": 5.928161722883341e-06, "loss": 0.1399, "step": 5026 }, { "epoch": 1.37, "grad_norm": 1.851747300715781, "learning_rate": 5.926713615470781e-06, "loss": 0.119, "step": 5027 }, { "epoch": 1.37, "grad_norm": 1.988226219250091, "learning_rate": 5.925265427555166e-06, "loss": 0.1432, "step": 5028 }, { "epoch": 1.37, "grad_norm": 1.9521867454549575, "learning_rate": 5.923817159262297e-06, "loss": 0.1206, "step": 5029 }, { "epoch": 1.37, "grad_norm": 1.8559110066631774, "learning_rate": 5.922368810717989e-06, "loss": 0.1126, "step": 5030 }, { "epoch": 1.37, "grad_norm": 1.7608750355664113, "learning_rate": 5.9209203820480555e-06, "loss": 0.1328, "step": 5031 }, { "epoch": 1.37, "grad_norm": 1.7977843451529179, "learning_rate": 5.919471873378322e-06, "loss": 0.115, "step": 5032 }, { "epoch": 1.37, "grad_norm": 2.059582824376823, "learning_rate": 5.91802328483462e-06, "loss": 0.1407, "step": 5033 }, { "epoch": 1.37, "grad_norm": 1.7589401785545038, "learning_rate": 5.916574616542785e-06, "loss": 0.0937, "step": 5034 }, { "epoch": 1.37, "grad_norm": 1.8464722292905205, "learning_rate": 5.915125868628664e-06, "loss": 0.093, "step": 5035 }, { "epoch": 1.37, "grad_norm": 1.91809140446907, "learning_rate": 5.913677041218111e-06, "loss": 0.1277, "step": 5036 }, { "epoch": 1.38, "grad_norm": 2.147611748890377, "learning_rate": 5.912228134436979e-06, "loss": 0.1223, "step": 5037 }, { "epoch": 1.38, "grad_norm": 1.7223954603002656, "learning_rate": 5.910779148411139e-06, "loss": 0.1007, "step": 5038 }, { "epoch": 1.38, "grad_norm": 1.6435248779725946, "learning_rate": 5.9093300832664625e-06, "loss": 0.1072, "step": 5039 }, { "epoch": 1.38, "grad_norm": 1.8316810571933446, "learning_rate": 5.907880939128826e-06, "loss": 0.1239, "step": 5040 }, { "epoch": 1.38, "grad_norm": 2.0288153789314536, "learning_rate": 5.9064317161241185e-06, "loss": 0.1241, "step": 5041 }, { "epoch": 1.38, "grad_norm": 1.8976503491707963, "learning_rate": 5.904982414378233e-06, "loss": 0.1357, "step": 5042 }, { "epoch": 1.38, "grad_norm": 2.177573045228236, "learning_rate": 5.903533034017068e-06, "loss": 0.1226, "step": 5043 }, { "epoch": 1.38, "grad_norm": 1.7863708962816582, "learning_rate": 5.902083575166532e-06, "loss": 0.095, "step": 5044 }, { "epoch": 1.38, "grad_norm": 1.9578563020406434, "learning_rate": 5.900634037952537e-06, "loss": 0.1159, "step": 5045 }, { "epoch": 1.38, "grad_norm": 1.759787605817178, "learning_rate": 5.899184422501005e-06, "loss": 0.1091, "step": 5046 }, { "epoch": 1.38, "grad_norm": 1.9860180552598778, "learning_rate": 5.897734728937863e-06, "loss": 0.1189, "step": 5047 }, { "epoch": 1.38, "grad_norm": 1.822050993233891, "learning_rate": 5.896284957389042e-06, "loss": 0.0991, "step": 5048 }, { "epoch": 1.38, "grad_norm": 2.13954128838974, "learning_rate": 5.8948351079804875e-06, "loss": 0.1257, "step": 5049 }, { "epoch": 1.38, "grad_norm": 1.8856741099998384, "learning_rate": 5.893385180838144e-06, "loss": 0.1154, "step": 5050 }, { "epoch": 1.38, "grad_norm": 1.655622211696462, "learning_rate": 5.891935176087967e-06, "loss": 0.102, "step": 5051 }, { "epoch": 1.38, "grad_norm": 1.8962847087911745, "learning_rate": 5.890485093855916e-06, "loss": 0.1319, "step": 5052 }, { "epoch": 1.38, "grad_norm": 2.1459880049209663, "learning_rate": 5.889034934267962e-06, "loss": 0.1334, "step": 5053 }, { "epoch": 1.38, "grad_norm": 1.9619211554407519, "learning_rate": 5.887584697450075e-06, "loss": 0.1281, "step": 5054 }, { "epoch": 1.38, "grad_norm": 1.855940385075262, "learning_rate": 5.88613438352824e-06, "loss": 0.1217, "step": 5055 }, { "epoch": 1.38, "grad_norm": 1.886327747691445, "learning_rate": 5.8846839926284435e-06, "loss": 0.1213, "step": 5056 }, { "epoch": 1.38, "grad_norm": 1.8263491048157114, "learning_rate": 5.883233524876681e-06, "loss": 0.0998, "step": 5057 }, { "epoch": 1.38, "grad_norm": 1.4613830994080448, "learning_rate": 5.88178298039895e-06, "loss": 0.0851, "step": 5058 }, { "epoch": 1.38, "grad_norm": 2.1843487471030403, "learning_rate": 5.880332359321264e-06, "loss": 0.1329, "step": 5059 }, { "epoch": 1.38, "grad_norm": 2.02411749728561, "learning_rate": 5.878881661769633e-06, "loss": 0.1152, "step": 5060 }, { "epoch": 1.38, "grad_norm": 2.305011313037653, "learning_rate": 5.877430887870081e-06, "loss": 0.1385, "step": 5061 }, { "epoch": 1.38, "grad_norm": 1.972171209393961, "learning_rate": 5.875980037748635e-06, "loss": 0.1591, "step": 5062 }, { "epoch": 1.38, "grad_norm": 1.992975974697504, "learning_rate": 5.87452911153133e-06, "loss": 0.1253, "step": 5063 }, { "epoch": 1.38, "grad_norm": 1.7498121276083862, "learning_rate": 5.873078109344204e-06, "loss": 0.1072, "step": 5064 }, { "epoch": 1.38, "grad_norm": 1.64478378218017, "learning_rate": 5.871627031313311e-06, "loss": 0.1078, "step": 5065 }, { "epoch": 1.38, "grad_norm": 2.0786553141524453, "learning_rate": 5.870175877564699e-06, "loss": 0.1197, "step": 5066 }, { "epoch": 1.38, "grad_norm": 1.536810066594668, "learning_rate": 5.8687246482244306e-06, "loss": 0.08, "step": 5067 }, { "epoch": 1.38, "grad_norm": 2.007317162977653, "learning_rate": 5.867273343418577e-06, "loss": 0.1375, "step": 5068 }, { "epoch": 1.38, "grad_norm": 1.823498260559666, "learning_rate": 5.865821963273206e-06, "loss": 0.1072, "step": 5069 }, { "epoch": 1.38, "grad_norm": 1.9448057679008224, "learning_rate": 5.864370507914403e-06, "loss": 0.1272, "step": 5070 }, { "epoch": 1.38, "grad_norm": 1.7932060406569077, "learning_rate": 5.8629189774682524e-06, "loss": 0.0989, "step": 5071 }, { "epoch": 1.38, "grad_norm": 2.173016208328216, "learning_rate": 5.8614673720608495e-06, "loss": 0.1314, "step": 5072 }, { "epoch": 1.38, "grad_norm": 1.8715303874180422, "learning_rate": 5.860015691818292e-06, "loss": 0.1143, "step": 5073 }, { "epoch": 1.39, "grad_norm": 1.911421442866843, "learning_rate": 5.858563936866691e-06, "loss": 0.1387, "step": 5074 }, { "epoch": 1.39, "grad_norm": 1.6210919890453315, "learning_rate": 5.857112107332155e-06, "loss": 0.1096, "step": 5075 }, { "epoch": 1.39, "grad_norm": 2.2499598607131857, "learning_rate": 5.855660203340804e-06, "loss": 0.1141, "step": 5076 }, { "epoch": 1.39, "grad_norm": 2.101609077176601, "learning_rate": 5.854208225018767e-06, "loss": 0.1293, "step": 5077 }, { "epoch": 1.39, "grad_norm": 1.8359219084747167, "learning_rate": 5.8527561724921735e-06, "loss": 0.1056, "step": 5078 }, { "epoch": 1.39, "grad_norm": 1.7277293827184643, "learning_rate": 5.851304045887164e-06, "loss": 0.0963, "step": 5079 }, { "epoch": 1.39, "grad_norm": 1.9781716158758402, "learning_rate": 5.849851845329884e-06, "loss": 0.1213, "step": 5080 }, { "epoch": 1.39, "grad_norm": 1.710632924115769, "learning_rate": 5.8483995709464845e-06, "loss": 0.0971, "step": 5081 }, { "epoch": 1.39, "grad_norm": 1.9650019287849914, "learning_rate": 5.846947222863123e-06, "loss": 0.1401, "step": 5082 }, { "epoch": 1.39, "grad_norm": 1.7432319442192512, "learning_rate": 5.845494801205967e-06, "loss": 0.0989, "step": 5083 }, { "epoch": 1.39, "grad_norm": 2.11468034896895, "learning_rate": 5.844042306101184e-06, "loss": 0.1436, "step": 5084 }, { "epoch": 1.39, "grad_norm": 2.058623735536554, "learning_rate": 5.842589737674954e-06, "loss": 0.1328, "step": 5085 }, { "epoch": 1.39, "grad_norm": 1.8170427656664632, "learning_rate": 5.841137096053459e-06, "loss": 0.1073, "step": 5086 }, { "epoch": 1.39, "grad_norm": 1.6444283767521197, "learning_rate": 5.839684381362891e-06, "loss": 0.1059, "step": 5087 }, { "epoch": 1.39, "grad_norm": 2.0231515383693486, "learning_rate": 5.8382315937294444e-06, "loss": 0.1351, "step": 5088 }, { "epoch": 1.39, "grad_norm": 1.7531764334516389, "learning_rate": 5.836778733279322e-06, "loss": 0.1005, "step": 5089 }, { "epoch": 1.39, "grad_norm": 1.8506491627549855, "learning_rate": 5.835325800138736e-06, "loss": 0.1313, "step": 5090 }, { "epoch": 1.39, "grad_norm": 1.8006825521940102, "learning_rate": 5.833872794433897e-06, "loss": 0.1082, "step": 5091 }, { "epoch": 1.39, "grad_norm": 1.8237788117541966, "learning_rate": 5.832419716291031e-06, "loss": 0.1033, "step": 5092 }, { "epoch": 1.39, "grad_norm": 2.1470922232331353, "learning_rate": 5.830966565836365e-06, "loss": 0.1338, "step": 5093 }, { "epoch": 1.39, "grad_norm": 2.1695064850909884, "learning_rate": 5.829513343196132e-06, "loss": 0.1301, "step": 5094 }, { "epoch": 1.39, "grad_norm": 2.1051031273457044, "learning_rate": 5.828060048496573e-06, "loss": 0.1345, "step": 5095 }, { "epoch": 1.39, "grad_norm": 2.1356332004517324, "learning_rate": 5.826606681863934e-06, "loss": 0.1337, "step": 5096 }, { "epoch": 1.39, "grad_norm": 1.6984421975735762, "learning_rate": 5.825153243424471e-06, "loss": 0.1099, "step": 5097 }, { "epoch": 1.39, "grad_norm": 2.050796456952679, "learning_rate": 5.823699733304441e-06, "loss": 0.1381, "step": 5098 }, { "epoch": 1.39, "grad_norm": 1.764124458219414, "learning_rate": 5.822246151630109e-06, "loss": 0.1155, "step": 5099 }, { "epoch": 1.39, "grad_norm": 3.3479149520313665, "learning_rate": 5.820792498527749e-06, "loss": 0.1685, "step": 5100 }, { "epoch": 1.39, "grad_norm": 1.7759005018329987, "learning_rate": 5.819338774123638e-06, "loss": 0.1239, "step": 5101 }, { "epoch": 1.39, "grad_norm": 1.883729435899512, "learning_rate": 5.81788497854406e-06, "loss": 0.1249, "step": 5102 }, { "epoch": 1.39, "grad_norm": 1.841413161515065, "learning_rate": 5.816431111915304e-06, "loss": 0.1189, "step": 5103 }, { "epoch": 1.39, "grad_norm": 1.8217901430142631, "learning_rate": 5.8149771743636675e-06, "loss": 0.1285, "step": 5104 }, { "epoch": 1.39, "grad_norm": 1.8284336105426517, "learning_rate": 5.813523166015455e-06, "loss": 0.1178, "step": 5105 }, { "epoch": 1.39, "grad_norm": 2.0543267234027347, "learning_rate": 5.812069086996972e-06, "loss": 0.1294, "step": 5106 }, { "epoch": 1.39, "grad_norm": 1.8859240779560933, "learning_rate": 5.810614937434537e-06, "loss": 0.1175, "step": 5107 }, { "epoch": 1.39, "grad_norm": 1.9975831709056648, "learning_rate": 5.8091607174544695e-06, "loss": 0.1269, "step": 5108 }, { "epoch": 1.39, "grad_norm": 1.9169591813566134, "learning_rate": 5.807706427183096e-06, "loss": 0.1166, "step": 5109 }, { "epoch": 1.4, "grad_norm": 1.858687849558745, "learning_rate": 5.806252066746751e-06, "loss": 0.1245, "step": 5110 }, { "epoch": 1.4, "grad_norm": 1.996083827760046, "learning_rate": 5.804797636271772e-06, "loss": 0.1335, "step": 5111 }, { "epoch": 1.4, "grad_norm": 1.7409289111680077, "learning_rate": 5.803343135884507e-06, "loss": 0.1098, "step": 5112 }, { "epoch": 1.4, "grad_norm": 2.0347170804943717, "learning_rate": 5.801888565711308e-06, "loss": 0.1353, "step": 5113 }, { "epoch": 1.4, "grad_norm": 1.7591855239442908, "learning_rate": 5.8004339258785296e-06, "loss": 0.1007, "step": 5114 }, { "epoch": 1.4, "grad_norm": 1.8522143084981304, "learning_rate": 5.798979216512536e-06, "loss": 0.1125, "step": 5115 }, { "epoch": 1.4, "grad_norm": 1.8438428593620593, "learning_rate": 5.797524437739699e-06, "loss": 0.1083, "step": 5116 }, { "epoch": 1.4, "grad_norm": 1.9663445844545666, "learning_rate": 5.796069589686393e-06, "loss": 0.1201, "step": 5117 }, { "epoch": 1.4, "grad_norm": 1.7869958705209372, "learning_rate": 5.794614672479e-06, "loss": 0.1215, "step": 5118 }, { "epoch": 1.4, "grad_norm": 2.1040768537238215, "learning_rate": 5.793159686243908e-06, "loss": 0.1156, "step": 5119 }, { "epoch": 1.4, "grad_norm": 1.9073501656739287, "learning_rate": 5.791704631107511e-06, "loss": 0.1279, "step": 5120 }, { "epoch": 1.4, "grad_norm": 1.6889405649149396, "learning_rate": 5.790249507196207e-06, "loss": 0.104, "step": 5121 }, { "epoch": 1.4, "grad_norm": 2.0225453173415753, "learning_rate": 5.7887943146364045e-06, "loss": 0.1335, "step": 5122 }, { "epoch": 1.4, "grad_norm": 1.8392982426845952, "learning_rate": 5.787339053554512e-06, "loss": 0.0912, "step": 5123 }, { "epoch": 1.4, "grad_norm": 2.304125827390601, "learning_rate": 5.78588372407695e-06, "loss": 0.1409, "step": 5124 }, { "epoch": 1.4, "grad_norm": 2.128773474822439, "learning_rate": 5.784428326330143e-06, "loss": 0.1308, "step": 5125 }, { "epoch": 1.4, "grad_norm": 1.9968719846992482, "learning_rate": 5.782972860440517e-06, "loss": 0.1358, "step": 5126 }, { "epoch": 1.4, "grad_norm": 1.8717743962610498, "learning_rate": 5.781517326534509e-06, "loss": 0.1249, "step": 5127 }, { "epoch": 1.4, "grad_norm": 1.8655682885447384, "learning_rate": 5.780061724738559e-06, "loss": 0.1248, "step": 5128 }, { "epoch": 1.4, "grad_norm": 1.7267417061042534, "learning_rate": 5.778606055179117e-06, "loss": 0.0936, "step": 5129 }, { "epoch": 1.4, "grad_norm": 1.7412119215793365, "learning_rate": 5.777150317982636e-06, "loss": 0.1023, "step": 5130 }, { "epoch": 1.4, "grad_norm": 2.0767667159637377, "learning_rate": 5.7756945132755715e-06, "loss": 0.1144, "step": 5131 }, { "epoch": 1.4, "grad_norm": 2.124782594984012, "learning_rate": 5.774238641184391e-06, "loss": 0.149, "step": 5132 }, { "epoch": 1.4, "grad_norm": 2.2121331224697167, "learning_rate": 5.7727827018355665e-06, "loss": 0.13, "step": 5133 }, { "epoch": 1.4, "grad_norm": 1.6512234751839003, "learning_rate": 5.771326695355573e-06, "loss": 0.0849, "step": 5134 }, { "epoch": 1.4, "grad_norm": 1.8300428370996042, "learning_rate": 5.76987062187089e-06, "loss": 0.1235, "step": 5135 }, { "epoch": 1.4, "grad_norm": 1.7915115070904726, "learning_rate": 5.768414481508011e-06, "loss": 0.1095, "step": 5136 }, { "epoch": 1.4, "grad_norm": 1.8695414499671794, "learning_rate": 5.766958274393428e-06, "loss": 0.1046, "step": 5137 }, { "epoch": 1.4, "grad_norm": 2.038623042397329, "learning_rate": 5.765502000653639e-06, "loss": 0.1391, "step": 5138 }, { "epoch": 1.4, "grad_norm": 1.7753507956216645, "learning_rate": 5.764045660415153e-06, "loss": 0.1336, "step": 5139 }, { "epoch": 1.4, "grad_norm": 1.6353312881382196, "learning_rate": 5.762589253804478e-06, "loss": 0.095, "step": 5140 }, { "epoch": 1.4, "grad_norm": 1.8982971180945565, "learning_rate": 5.761132780948132e-06, "loss": 0.1267, "step": 5141 }, { "epoch": 1.4, "grad_norm": 1.6510989612078975, "learning_rate": 5.75967624197264e-06, "loss": 0.0979, "step": 5142 }, { "epoch": 1.4, "grad_norm": 1.9611921514073136, "learning_rate": 5.758219637004529e-06, "loss": 0.1342, "step": 5143 }, { "epoch": 1.4, "grad_norm": 1.7749491898904008, "learning_rate": 5.756762966170334e-06, "loss": 0.1204, "step": 5144 }, { "epoch": 1.4, "grad_norm": 2.08518080656657, "learning_rate": 5.755306229596594e-06, "loss": 0.1094, "step": 5145 }, { "epoch": 1.4, "grad_norm": 1.8935107554233241, "learning_rate": 5.753849427409857e-06, "loss": 0.1295, "step": 5146 }, { "epoch": 1.41, "grad_norm": 1.9037261979553235, "learning_rate": 5.752392559736671e-06, "loss": 0.1196, "step": 5147 }, { "epoch": 1.41, "grad_norm": 2.0466734713293575, "learning_rate": 5.750935626703598e-06, "loss": 0.1136, "step": 5148 }, { "epoch": 1.41, "grad_norm": 1.7773326058936851, "learning_rate": 5.749478628437196e-06, "loss": 0.102, "step": 5149 }, { "epoch": 1.41, "grad_norm": 1.972591120865003, "learning_rate": 5.748021565064037e-06, "loss": 0.1307, "step": 5150 }, { "epoch": 1.41, "grad_norm": 1.8704000697522698, "learning_rate": 5.746564436710694e-06, "loss": 0.1203, "step": 5151 }, { "epoch": 1.41, "grad_norm": 2.016668253341138, "learning_rate": 5.745107243503747e-06, "loss": 0.1317, "step": 5152 }, { "epoch": 1.41, "grad_norm": 1.8615736932006264, "learning_rate": 5.74364998556978e-06, "loss": 0.1192, "step": 5153 }, { "epoch": 1.41, "grad_norm": 1.821764223084276, "learning_rate": 5.742192663035388e-06, "loss": 0.1217, "step": 5154 }, { "epoch": 1.41, "grad_norm": 2.078251197216359, "learning_rate": 5.740735276027164e-06, "loss": 0.1228, "step": 5155 }, { "epoch": 1.41, "grad_norm": 2.0492589209062237, "learning_rate": 5.739277824671711e-06, "loss": 0.1211, "step": 5156 }, { "epoch": 1.41, "grad_norm": 1.9216932825568611, "learning_rate": 5.737820309095639e-06, "loss": 0.1148, "step": 5157 }, { "epoch": 1.41, "grad_norm": 1.931295093359262, "learning_rate": 5.736362729425558e-06, "loss": 0.1199, "step": 5158 }, { "epoch": 1.41, "grad_norm": 1.923224287788985, "learning_rate": 5.734905085788091e-06, "loss": 0.1226, "step": 5159 }, { "epoch": 1.41, "grad_norm": 1.9387714382958043, "learning_rate": 5.733447378309861e-06, "loss": 0.1254, "step": 5160 }, { "epoch": 1.41, "grad_norm": 1.7823131147660431, "learning_rate": 5.731989607117497e-06, "loss": 0.1001, "step": 5161 }, { "epoch": 1.41, "grad_norm": 1.9993065746969982, "learning_rate": 5.730531772337634e-06, "loss": 0.1239, "step": 5162 }, { "epoch": 1.41, "grad_norm": 1.9201976216481262, "learning_rate": 5.729073874096917e-06, "loss": 0.1192, "step": 5163 }, { "epoch": 1.41, "grad_norm": 2.6441507211020685, "learning_rate": 5.72761591252199e-06, "loss": 0.1298, "step": 5164 }, { "epoch": 1.41, "grad_norm": 2.139222525703095, "learning_rate": 5.726157887739505e-06, "loss": 0.147, "step": 5165 }, { "epoch": 1.41, "grad_norm": 1.9035990479721026, "learning_rate": 5.724699799876124e-06, "loss": 0.1252, "step": 5166 }, { "epoch": 1.41, "grad_norm": 1.8101621125422658, "learning_rate": 5.723241649058503e-06, "loss": 0.1125, "step": 5167 }, { "epoch": 1.41, "grad_norm": 2.171727132080312, "learning_rate": 5.721783435413315e-06, "loss": 0.1176, "step": 5168 }, { "epoch": 1.41, "grad_norm": 1.8652802140485323, "learning_rate": 5.7203251590672345e-06, "loss": 0.1229, "step": 5169 }, { "epoch": 1.41, "grad_norm": 2.0042443396582086, "learning_rate": 5.71886682014694e-06, "loss": 0.1226, "step": 5170 }, { "epoch": 1.41, "grad_norm": 1.7492718707986108, "learning_rate": 5.7174084187791165e-06, "loss": 0.1144, "step": 5171 }, { "epoch": 1.41, "grad_norm": 2.0745748578950893, "learning_rate": 5.715949955090456e-06, "loss": 0.1201, "step": 5172 }, { "epoch": 1.41, "grad_norm": 1.8303897475427093, "learning_rate": 5.714491429207651e-06, "loss": 0.1123, "step": 5173 }, { "epoch": 1.41, "grad_norm": 1.9188090421231063, "learning_rate": 5.713032841257407e-06, "loss": 0.1176, "step": 5174 }, { "epoch": 1.41, "grad_norm": 1.661572595767909, "learning_rate": 5.711574191366427e-06, "loss": 0.1141, "step": 5175 }, { "epoch": 1.41, "grad_norm": 1.7389120063191656, "learning_rate": 5.710115479661425e-06, "loss": 0.1078, "step": 5176 }, { "epoch": 1.41, "grad_norm": 1.7369512408618193, "learning_rate": 5.708656706269117e-06, "loss": 0.1037, "step": 5177 }, { "epoch": 1.41, "grad_norm": 1.9359516778878942, "learning_rate": 5.707197871316228e-06, "loss": 0.1277, "step": 5178 }, { "epoch": 1.41, "grad_norm": 2.335203195175909, "learning_rate": 5.705738974929484e-06, "loss": 0.1544, "step": 5179 }, { "epoch": 1.41, "grad_norm": 1.6346588492619258, "learning_rate": 5.70428001723562e-06, "loss": 0.0881, "step": 5180 }, { "epoch": 1.41, "grad_norm": 1.695901369889061, "learning_rate": 5.702820998361374e-06, "loss": 0.1018, "step": 5181 }, { "epoch": 1.41, "grad_norm": 1.5519986490440538, "learning_rate": 5.701361918433489e-06, "loss": 0.0934, "step": 5182 }, { "epoch": 1.41, "grad_norm": 2.1031870955854868, "learning_rate": 5.699902777578716e-06, "loss": 0.1203, "step": 5183 }, { "epoch": 1.42, "grad_norm": 2.1861747085122927, "learning_rate": 5.69844357592381e-06, "loss": 0.1411, "step": 5184 }, { "epoch": 1.42, "grad_norm": 2.0106468313162167, "learning_rate": 5.696984313595529e-06, "loss": 0.1267, "step": 5185 }, { "epoch": 1.42, "grad_norm": 1.8232743567063099, "learning_rate": 5.69552499072064e-06, "loss": 0.0933, "step": 5186 }, { "epoch": 1.42, "grad_norm": 1.5936963866086897, "learning_rate": 5.694065607425914e-06, "loss": 0.0904, "step": 5187 }, { "epoch": 1.42, "grad_norm": 2.1360558712143707, "learning_rate": 5.692606163838125e-06, "loss": 0.1279, "step": 5188 }, { "epoch": 1.42, "grad_norm": 2.195234164364305, "learning_rate": 5.6911466600840535e-06, "loss": 0.105, "step": 5189 }, { "epoch": 1.42, "grad_norm": 1.9116659246370282, "learning_rate": 5.689687096290488e-06, "loss": 0.1258, "step": 5190 }, { "epoch": 1.42, "grad_norm": 1.6544165835655402, "learning_rate": 5.688227472584218e-06, "loss": 0.0969, "step": 5191 }, { "epoch": 1.42, "grad_norm": 1.9026982124008578, "learning_rate": 5.686767789092041e-06, "loss": 0.1086, "step": 5192 }, { "epoch": 1.42, "grad_norm": 2.0786105027061814, "learning_rate": 5.68530804594076e-06, "loss": 0.1317, "step": 5193 }, { "epoch": 1.42, "grad_norm": 2.2290110759518957, "learning_rate": 5.683848243257181e-06, "loss": 0.1141, "step": 5194 }, { "epoch": 1.42, "grad_norm": 1.748043193744473, "learning_rate": 5.682388381168115e-06, "loss": 0.1214, "step": 5195 }, { "epoch": 1.42, "grad_norm": 1.9772083775253568, "learning_rate": 5.68092845980038e-06, "loss": 0.1161, "step": 5196 }, { "epoch": 1.42, "grad_norm": 1.9116313747404785, "learning_rate": 5.679468479280798e-06, "loss": 0.1161, "step": 5197 }, { "epoch": 1.42, "grad_norm": 1.9262304952269778, "learning_rate": 5.678008439736198e-06, "loss": 0.1099, "step": 5198 }, { "epoch": 1.42, "grad_norm": 2.029175851506085, "learning_rate": 5.6765483412934144e-06, "loss": 0.1344, "step": 5199 }, { "epoch": 1.42, "grad_norm": 1.7106856206631176, "learning_rate": 5.67508818407928e-06, "loss": 0.1052, "step": 5200 }, { "epoch": 1.42, "grad_norm": 1.79970087680083, "learning_rate": 5.673627968220642e-06, "loss": 0.1058, "step": 5201 }, { "epoch": 1.42, "grad_norm": 1.7264443631193565, "learning_rate": 5.672167693844348e-06, "loss": 0.105, "step": 5202 }, { "epoch": 1.42, "grad_norm": 1.5255982165042767, "learning_rate": 5.670707361077249e-06, "loss": 0.0941, "step": 5203 }, { "epoch": 1.42, "grad_norm": 1.8350066059754944, "learning_rate": 5.669246970046206e-06, "loss": 0.115, "step": 5204 }, { "epoch": 1.42, "grad_norm": 1.8149584151390863, "learning_rate": 5.667786520878079e-06, "loss": 0.1177, "step": 5205 }, { "epoch": 1.42, "grad_norm": 2.123866810707329, "learning_rate": 5.666326013699739e-06, "loss": 0.1476, "step": 5206 }, { "epoch": 1.42, "grad_norm": 1.8146274675873144, "learning_rate": 5.664865448638059e-06, "loss": 0.1067, "step": 5207 }, { "epoch": 1.42, "grad_norm": 1.8350339308023162, "learning_rate": 5.663404825819916e-06, "loss": 0.1035, "step": 5208 }, { "epoch": 1.42, "grad_norm": 1.8973912403158681, "learning_rate": 5.661944145372193e-06, "loss": 0.1168, "step": 5209 }, { "epoch": 1.42, "grad_norm": 2.0352549219325153, "learning_rate": 5.660483407421783e-06, "loss": 0.1231, "step": 5210 }, { "epoch": 1.42, "grad_norm": 1.7064762501848323, "learning_rate": 5.659022612095575e-06, "loss": 0.1123, "step": 5211 }, { "epoch": 1.42, "grad_norm": 1.7056421520573755, "learning_rate": 5.657561759520467e-06, "loss": 0.1092, "step": 5212 }, { "epoch": 1.42, "grad_norm": 2.061808326111, "learning_rate": 5.656100849823366e-06, "loss": 0.1299, "step": 5213 }, { "epoch": 1.42, "grad_norm": 1.820253565660934, "learning_rate": 5.6546398831311774e-06, "loss": 0.1085, "step": 5214 }, { "epoch": 1.42, "grad_norm": 2.066764402401858, "learning_rate": 5.6531788595708155e-06, "loss": 0.112, "step": 5215 }, { "epoch": 1.42, "grad_norm": 2.221039780122519, "learning_rate": 5.6517177792692005e-06, "loss": 0.1232, "step": 5216 }, { "epoch": 1.42, "grad_norm": 1.8134641770559579, "learning_rate": 5.650256642353251e-06, "loss": 0.1184, "step": 5217 }, { "epoch": 1.42, "grad_norm": 1.9443455206721616, "learning_rate": 5.648795448949898e-06, "loss": 0.1362, "step": 5218 }, { "epoch": 1.42, "grad_norm": 2.114599231343073, "learning_rate": 5.6473341991860755e-06, "loss": 0.1461, "step": 5219 }, { "epoch": 1.43, "grad_norm": 1.8069603848787623, "learning_rate": 5.645872893188718e-06, "loss": 0.111, "step": 5220 }, { "epoch": 1.43, "grad_norm": 1.774438468258415, "learning_rate": 5.644411531084771e-06, "loss": 0.1212, "step": 5221 }, { "epoch": 1.43, "grad_norm": 1.8343346473995494, "learning_rate": 5.642950113001183e-06, "loss": 0.1286, "step": 5222 }, { "epoch": 1.43, "grad_norm": 1.850253466989269, "learning_rate": 5.641488639064904e-06, "loss": 0.1255, "step": 5223 }, { "epoch": 1.43, "grad_norm": 1.853881721111044, "learning_rate": 5.640027109402892e-06, "loss": 0.1144, "step": 5224 }, { "epoch": 1.43, "grad_norm": 1.750781899846873, "learning_rate": 5.638565524142111e-06, "loss": 0.0837, "step": 5225 }, { "epoch": 1.43, "grad_norm": 1.7737824940329674, "learning_rate": 5.637103883409525e-06, "loss": 0.0917, "step": 5226 }, { "epoch": 1.43, "grad_norm": 2.166112960748287, "learning_rate": 5.635642187332108e-06, "loss": 0.1438, "step": 5227 }, { "epoch": 1.43, "grad_norm": 1.994350547631619, "learning_rate": 5.634180436036836e-06, "loss": 0.1232, "step": 5228 }, { "epoch": 1.43, "grad_norm": 1.8041519762508742, "learning_rate": 5.63271862965069e-06, "loss": 0.0873, "step": 5229 }, { "epoch": 1.43, "grad_norm": 1.7876646858536376, "learning_rate": 5.6312567683006565e-06, "loss": 0.1044, "step": 5230 }, { "epoch": 1.43, "grad_norm": 2.3974761630972368, "learning_rate": 5.629794852113729e-06, "loss": 0.1209, "step": 5231 }, { "epoch": 1.43, "grad_norm": 2.1172445704772316, "learning_rate": 5.628332881216899e-06, "loss": 0.14, "step": 5232 }, { "epoch": 1.43, "grad_norm": 2.010250280278537, "learning_rate": 5.6268708557371695e-06, "loss": 0.1308, "step": 5233 }, { "epoch": 1.43, "grad_norm": 6.929899743094044, "learning_rate": 5.625408775801546e-06, "loss": 0.1851, "step": 5234 }, { "epoch": 1.43, "grad_norm": 1.700682830707001, "learning_rate": 5.623946641537038e-06, "loss": 0.1181, "step": 5235 }, { "epoch": 1.43, "grad_norm": 1.726260156262408, "learning_rate": 5.622484453070659e-06, "loss": 0.1188, "step": 5236 }, { "epoch": 1.43, "grad_norm": 1.7428440268994987, "learning_rate": 5.621022210529431e-06, "loss": 0.107, "step": 5237 }, { "epoch": 1.43, "grad_norm": 1.9425193704864747, "learning_rate": 5.619559914040376e-06, "loss": 0.1377, "step": 5238 }, { "epoch": 1.43, "grad_norm": 2.132174108983779, "learning_rate": 5.618097563730522e-06, "loss": 0.1358, "step": 5239 }, { "epoch": 1.43, "grad_norm": 2.026909821081086, "learning_rate": 5.616635159726907e-06, "loss": 0.1312, "step": 5240 }, { "epoch": 1.43, "grad_norm": 1.7817257757433718, "learning_rate": 5.615172702156564e-06, "loss": 0.1109, "step": 5241 }, { "epoch": 1.43, "grad_norm": 1.9056466491720712, "learning_rate": 5.613710191146539e-06, "loss": 0.1362, "step": 5242 }, { "epoch": 1.43, "grad_norm": 1.8344045572626833, "learning_rate": 5.612247626823878e-06, "loss": 0.1206, "step": 5243 }, { "epoch": 1.43, "grad_norm": 1.8614398371496157, "learning_rate": 5.610785009315633e-06, "loss": 0.1296, "step": 5244 }, { "epoch": 1.43, "grad_norm": 1.9759860983485649, "learning_rate": 5.609322338748861e-06, "loss": 0.1252, "step": 5245 }, { "epoch": 1.43, "grad_norm": 1.9552709724238888, "learning_rate": 5.607859615250626e-06, "loss": 0.1267, "step": 5246 }, { "epoch": 1.43, "grad_norm": 1.6787095350622503, "learning_rate": 5.606396838947988e-06, "loss": 0.1035, "step": 5247 }, { "epoch": 1.43, "grad_norm": 1.7447287687220425, "learning_rate": 5.604934009968023e-06, "loss": 0.1035, "step": 5248 }, { "epoch": 1.43, "grad_norm": 1.638886627808892, "learning_rate": 5.603471128437804e-06, "loss": 0.1046, "step": 5249 }, { "epoch": 1.43, "grad_norm": 1.8234028366458395, "learning_rate": 5.60200819448441e-06, "loss": 0.0954, "step": 5250 }, { "epoch": 1.43, "grad_norm": 2.1610716059279964, "learning_rate": 5.600545208234927e-06, "loss": 0.1385, "step": 5251 }, { "epoch": 1.43, "grad_norm": 1.9966944765756987, "learning_rate": 5.599082169816441e-06, "loss": 0.12, "step": 5252 }, { "epoch": 1.43, "grad_norm": 2.310644494404322, "learning_rate": 5.597619079356047e-06, "loss": 0.1393, "step": 5253 }, { "epoch": 1.43, "grad_norm": 1.7647891542698966, "learning_rate": 5.596155936980844e-06, "loss": 0.1011, "step": 5254 }, { "epoch": 1.43, "grad_norm": 1.9427808823891612, "learning_rate": 5.594692742817932e-06, "loss": 0.1182, "step": 5255 }, { "epoch": 1.43, "grad_norm": 1.8641421740813948, "learning_rate": 5.593229496994419e-06, "loss": 0.1102, "step": 5256 }, { "epoch": 1.44, "grad_norm": 1.9689183204187046, "learning_rate": 5.5917661996374155e-06, "loss": 0.0946, "step": 5257 }, { "epoch": 1.44, "grad_norm": 1.7226767739737063, "learning_rate": 5.5903028508740385e-06, "loss": 0.1103, "step": 5258 }, { "epoch": 1.44, "grad_norm": 2.2352050278918365, "learning_rate": 5.588839450831407e-06, "loss": 0.1302, "step": 5259 }, { "epoch": 1.44, "grad_norm": 1.9616214256530697, "learning_rate": 5.587375999636645e-06, "loss": 0.1342, "step": 5260 }, { "epoch": 1.44, "grad_norm": 1.997698391433047, "learning_rate": 5.585912497416885e-06, "loss": 0.1106, "step": 5261 }, { "epoch": 1.44, "grad_norm": 2.2240891896275716, "learning_rate": 5.5844489442992575e-06, "loss": 0.1424, "step": 5262 }, { "epoch": 1.44, "grad_norm": 1.6917312742149904, "learning_rate": 5.582985340410901e-06, "loss": 0.0982, "step": 5263 }, { "epoch": 1.44, "grad_norm": 2.208501316386937, "learning_rate": 5.581521685878959e-06, "loss": 0.1304, "step": 5264 }, { "epoch": 1.44, "grad_norm": 1.6825963992276745, "learning_rate": 5.5800579808305766e-06, "loss": 0.1036, "step": 5265 }, { "epoch": 1.44, "grad_norm": 1.6003004788966047, "learning_rate": 5.578594225392906e-06, "loss": 0.1045, "step": 5266 }, { "epoch": 1.44, "grad_norm": 1.925600542779073, "learning_rate": 5.577130419693104e-06, "loss": 0.125, "step": 5267 }, { "epoch": 1.44, "grad_norm": 2.0109714005885895, "learning_rate": 5.575666563858329e-06, "loss": 0.1156, "step": 5268 }, { "epoch": 1.44, "grad_norm": 1.8871176870291024, "learning_rate": 5.574202658015744e-06, "loss": 0.1179, "step": 5269 }, { "epoch": 1.44, "grad_norm": 1.916120523130814, "learning_rate": 5.57273870229252e-06, "loss": 0.0988, "step": 5270 }, { "epoch": 1.44, "grad_norm": 2.018212891320888, "learning_rate": 5.571274696815828e-06, "loss": 0.1385, "step": 5271 }, { "epoch": 1.44, "grad_norm": 1.7706841540620224, "learning_rate": 5.569810641712847e-06, "loss": 0.1225, "step": 5272 }, { "epoch": 1.44, "grad_norm": 2.0651736163841328, "learning_rate": 5.568346537110759e-06, "loss": 0.135, "step": 5273 }, { "epoch": 1.44, "grad_norm": 1.6153609832755442, "learning_rate": 5.566882383136748e-06, "loss": 0.1077, "step": 5274 }, { "epoch": 1.44, "grad_norm": 1.6231648979903837, "learning_rate": 5.565418179918004e-06, "loss": 0.105, "step": 5275 }, { "epoch": 1.44, "grad_norm": 2.286318659616542, "learning_rate": 5.563953927581724e-06, "loss": 0.1395, "step": 5276 }, { "epoch": 1.44, "grad_norm": 1.8072308724091177, "learning_rate": 5.562489626255104e-06, "loss": 0.1174, "step": 5277 }, { "epoch": 1.44, "grad_norm": 2.0271338722833763, "learning_rate": 5.561025276065348e-06, "loss": 0.1391, "step": 5278 }, { "epoch": 1.44, "grad_norm": 1.9143869577044594, "learning_rate": 5.559560877139665e-06, "loss": 0.1189, "step": 5279 }, { "epoch": 1.44, "grad_norm": 1.732693333924942, "learning_rate": 5.558096429605263e-06, "loss": 0.113, "step": 5280 }, { "epoch": 1.44, "grad_norm": 1.8761674973781715, "learning_rate": 5.5566319335893604e-06, "loss": 0.1131, "step": 5281 }, { "epoch": 1.44, "grad_norm": 1.6265334383371182, "learning_rate": 5.555167389219176e-06, "loss": 0.1137, "step": 5282 }, { "epoch": 1.44, "grad_norm": 1.7019982464878458, "learning_rate": 5.553702796621933e-06, "loss": 0.0989, "step": 5283 }, { "epoch": 1.44, "grad_norm": 1.90156828208498, "learning_rate": 5.552238155924861e-06, "loss": 0.1188, "step": 5284 }, { "epoch": 1.44, "grad_norm": 1.9826773284013783, "learning_rate": 5.550773467255195e-06, "loss": 0.1182, "step": 5285 }, { "epoch": 1.44, "grad_norm": 2.076557665138287, "learning_rate": 5.549308730740166e-06, "loss": 0.1295, "step": 5286 }, { "epoch": 1.44, "grad_norm": 2.1533821835296334, "learning_rate": 5.5478439465070174e-06, "loss": 0.153, "step": 5287 }, { "epoch": 1.44, "grad_norm": 1.7381585741314176, "learning_rate": 5.546379114682996e-06, "loss": 0.1066, "step": 5288 }, { "epoch": 1.44, "grad_norm": 1.6635927313462626, "learning_rate": 5.544914235395347e-06, "loss": 0.1073, "step": 5289 }, { "epoch": 1.44, "grad_norm": 1.781751405359924, "learning_rate": 5.543449308771328e-06, "loss": 0.1206, "step": 5290 }, { "epoch": 1.44, "grad_norm": 1.971139103329158, "learning_rate": 5.541984334938193e-06, "loss": 0.1346, "step": 5291 }, { "epoch": 1.44, "grad_norm": 1.6186839785683387, "learning_rate": 5.540519314023204e-06, "loss": 0.0929, "step": 5292 }, { "epoch": 1.44, "grad_norm": 2.1385548062551716, "learning_rate": 5.5390542461536275e-06, "loss": 0.1564, "step": 5293 }, { "epoch": 1.45, "grad_norm": 1.7201549108815215, "learning_rate": 5.5375891314567335e-06, "loss": 0.1245, "step": 5294 }, { "epoch": 1.45, "grad_norm": 1.822533325792133, "learning_rate": 5.536123970059793e-06, "loss": 0.1236, "step": 5295 }, { "epoch": 1.45, "grad_norm": 1.9278138382341734, "learning_rate": 5.534658762090087e-06, "loss": 0.1208, "step": 5296 }, { "epoch": 1.45, "grad_norm": 1.5521411642870067, "learning_rate": 5.533193507674895e-06, "loss": 0.0936, "step": 5297 }, { "epoch": 1.45, "grad_norm": 1.8004949538420973, "learning_rate": 5.531728206941502e-06, "loss": 0.0979, "step": 5298 }, { "epoch": 1.45, "grad_norm": 1.5087233539099076, "learning_rate": 5.5302628600172005e-06, "loss": 0.0988, "step": 5299 }, { "epoch": 1.45, "grad_norm": 1.6392810666795463, "learning_rate": 5.5287974670292825e-06, "loss": 0.0997, "step": 5300 }, { "epoch": 1.45, "grad_norm": 2.246531209545475, "learning_rate": 5.527332028105046e-06, "loss": 0.1441, "step": 5301 }, { "epoch": 1.45, "grad_norm": 1.5906231938863176, "learning_rate": 5.525866543371794e-06, "loss": 0.1063, "step": 5302 }, { "epoch": 1.45, "grad_norm": 2.641074152783996, "learning_rate": 5.5244010129568294e-06, "loss": 0.1252, "step": 5303 }, { "epoch": 1.45, "grad_norm": 2.096881893280961, "learning_rate": 5.522935436987465e-06, "loss": 0.1363, "step": 5304 }, { "epoch": 1.45, "grad_norm": 1.7027707552441753, "learning_rate": 5.521469815591014e-06, "loss": 0.1063, "step": 5305 }, { "epoch": 1.45, "grad_norm": 1.9685022450361709, "learning_rate": 5.520004148894793e-06, "loss": 0.1286, "step": 5306 }, { "epoch": 1.45, "grad_norm": 1.7145771958239175, "learning_rate": 5.518538437026123e-06, "loss": 0.1123, "step": 5307 }, { "epoch": 1.45, "grad_norm": 1.8889040155802865, "learning_rate": 5.517072680112332e-06, "loss": 0.1133, "step": 5308 }, { "epoch": 1.45, "grad_norm": 1.8396074353811016, "learning_rate": 5.515606878280747e-06, "loss": 0.1337, "step": 5309 }, { "epoch": 1.45, "grad_norm": 1.8626200442685266, "learning_rate": 5.514141031658703e-06, "loss": 0.1292, "step": 5310 }, { "epoch": 1.45, "grad_norm": 1.749966327344818, "learning_rate": 5.512675140373537e-06, "loss": 0.1161, "step": 5311 }, { "epoch": 1.45, "grad_norm": 1.9994239729956802, "learning_rate": 5.511209204552588e-06, "loss": 0.139, "step": 5312 }, { "epoch": 1.45, "grad_norm": 1.81852097240687, "learning_rate": 5.509743224323203e-06, "loss": 0.1128, "step": 5313 }, { "epoch": 1.45, "grad_norm": 1.6591640013897353, "learning_rate": 5.508277199812732e-06, "loss": 0.1029, "step": 5314 }, { "epoch": 1.45, "grad_norm": 1.8647796237086316, "learning_rate": 5.506811131148524e-06, "loss": 0.119, "step": 5315 }, { "epoch": 1.45, "grad_norm": 1.924104771325096, "learning_rate": 5.5053450184579374e-06, "loss": 0.0997, "step": 5316 }, { "epoch": 1.45, "grad_norm": 1.821136979857864, "learning_rate": 5.5038788618683335e-06, "loss": 0.1199, "step": 5317 }, { "epoch": 1.45, "grad_norm": 1.6883172787387781, "learning_rate": 5.502412661507076e-06, "loss": 0.1038, "step": 5318 }, { "epoch": 1.45, "grad_norm": 1.7017783469327037, "learning_rate": 5.500946417501532e-06, "loss": 0.1075, "step": 5319 }, { "epoch": 1.45, "grad_norm": 1.9339275791805233, "learning_rate": 5.499480129979073e-06, "loss": 0.1171, "step": 5320 }, { "epoch": 1.45, "grad_norm": 1.9950668374318195, "learning_rate": 5.498013799067077e-06, "loss": 0.1181, "step": 5321 }, { "epoch": 1.45, "grad_norm": 2.0864399431821004, "learning_rate": 5.49654742489292e-06, "loss": 0.1301, "step": 5322 }, { "epoch": 1.45, "grad_norm": 2.0548946930359393, "learning_rate": 5.495081007583986e-06, "loss": 0.1422, "step": 5323 }, { "epoch": 1.45, "grad_norm": 1.746566338432751, "learning_rate": 5.493614547267664e-06, "loss": 0.1039, "step": 5324 }, { "epoch": 1.45, "grad_norm": 1.6622084833057937, "learning_rate": 5.492148044071342e-06, "loss": 0.0979, "step": 5325 }, { "epoch": 1.45, "grad_norm": 1.6182923331828287, "learning_rate": 5.490681498122415e-06, "loss": 0.1179, "step": 5326 }, { "epoch": 1.45, "grad_norm": 2.2117874882886666, "learning_rate": 5.4892149095482815e-06, "loss": 0.1319, "step": 5327 }, { "epoch": 1.45, "grad_norm": 1.9514931195335927, "learning_rate": 5.487748278476342e-06, "loss": 0.1075, "step": 5328 }, { "epoch": 1.45, "grad_norm": 2.0211051629681203, "learning_rate": 5.486281605034004e-06, "loss": 0.1118, "step": 5329 }, { "epoch": 1.46, "grad_norm": 1.749586553830844, "learning_rate": 5.484814889348673e-06, "loss": 0.117, "step": 5330 }, { "epoch": 1.46, "grad_norm": 1.7387106373355585, "learning_rate": 5.483348131547765e-06, "loss": 0.1102, "step": 5331 }, { "epoch": 1.46, "grad_norm": 1.7869853654007763, "learning_rate": 5.481881331758696e-06, "loss": 0.119, "step": 5332 }, { "epoch": 1.46, "grad_norm": 1.713687749713591, "learning_rate": 5.480414490108884e-06, "loss": 0.1046, "step": 5333 }, { "epoch": 1.46, "grad_norm": 2.0489839934274596, "learning_rate": 5.478947606725754e-06, "loss": 0.1246, "step": 5334 }, { "epoch": 1.46, "grad_norm": 2.1280105722323204, "learning_rate": 5.477480681736734e-06, "loss": 0.1373, "step": 5335 }, { "epoch": 1.46, "grad_norm": 1.715127355121317, "learning_rate": 5.476013715269254e-06, "loss": 0.1147, "step": 5336 }, { "epoch": 1.46, "grad_norm": 1.8742772229921738, "learning_rate": 5.474546707450748e-06, "loss": 0.1275, "step": 5337 }, { "epoch": 1.46, "grad_norm": 1.7982285423370132, "learning_rate": 5.473079658408655e-06, "loss": 0.1172, "step": 5338 }, { "epoch": 1.46, "grad_norm": 1.900943795778086, "learning_rate": 5.471612568270415e-06, "loss": 0.1042, "step": 5339 }, { "epoch": 1.46, "grad_norm": 1.7097739134935468, "learning_rate": 5.4701454371634756e-06, "loss": 0.0953, "step": 5340 }, { "epoch": 1.46, "grad_norm": 1.6460378318747704, "learning_rate": 5.468678265215286e-06, "loss": 0.0935, "step": 5341 }, { "epoch": 1.46, "grad_norm": 1.8542532618124885, "learning_rate": 5.467211052553295e-06, "loss": 0.1197, "step": 5342 }, { "epoch": 1.46, "grad_norm": 1.6895578666012736, "learning_rate": 5.465743799304961e-06, "loss": 0.1084, "step": 5343 }, { "epoch": 1.46, "grad_norm": 1.6244813519278594, "learning_rate": 5.464276505597743e-06, "loss": 0.0952, "step": 5344 }, { "epoch": 1.46, "grad_norm": 1.7419565845525498, "learning_rate": 5.462809171559104e-06, "loss": 0.097, "step": 5345 }, { "epoch": 1.46, "grad_norm": 2.203499387502144, "learning_rate": 5.46134179731651e-06, "loss": 0.1267, "step": 5346 }, { "epoch": 1.46, "grad_norm": 1.5798623980431044, "learning_rate": 5.4598743829974334e-06, "loss": 0.0944, "step": 5347 }, { "epoch": 1.46, "grad_norm": 1.907882318608598, "learning_rate": 5.458406928729343e-06, "loss": 0.1287, "step": 5348 }, { "epoch": 1.46, "grad_norm": 1.8825765376062369, "learning_rate": 5.456939434639719e-06, "loss": 0.1122, "step": 5349 }, { "epoch": 1.46, "grad_norm": 1.9507807590122432, "learning_rate": 5.455471900856041e-06, "loss": 0.1251, "step": 5350 }, { "epoch": 1.46, "grad_norm": 2.038331018661256, "learning_rate": 5.454004327505792e-06, "loss": 0.1266, "step": 5351 }, { "epoch": 1.46, "grad_norm": 1.8049492614449725, "learning_rate": 5.45253671471646e-06, "loss": 0.1193, "step": 5352 }, { "epoch": 1.46, "grad_norm": 1.5526121393090817, "learning_rate": 5.451069062615536e-06, "loss": 0.0997, "step": 5353 }, { "epoch": 1.46, "grad_norm": 1.9678547710699303, "learning_rate": 5.4496013713305126e-06, "loss": 0.1216, "step": 5354 }, { "epoch": 1.46, "grad_norm": 2.141050783221842, "learning_rate": 5.4481336409888886e-06, "loss": 0.1249, "step": 5355 }, { "epoch": 1.46, "grad_norm": 1.7455599854222645, "learning_rate": 5.446665871718166e-06, "loss": 0.117, "step": 5356 }, { "epoch": 1.46, "grad_norm": 1.7707706774333558, "learning_rate": 5.445198063645844e-06, "loss": 0.1124, "step": 5357 }, { "epoch": 1.46, "grad_norm": 1.613915861689703, "learning_rate": 5.443730216899437e-06, "loss": 0.0971, "step": 5358 }, { "epoch": 1.46, "grad_norm": 1.9841003393937986, "learning_rate": 5.442262331606451e-06, "loss": 0.1303, "step": 5359 }, { "epoch": 1.46, "grad_norm": 1.9577249804294283, "learning_rate": 5.440794407894403e-06, "loss": 0.1195, "step": 5360 }, { "epoch": 1.46, "grad_norm": 1.9162297061922315, "learning_rate": 5.439326445890808e-06, "loss": 0.1437, "step": 5361 }, { "epoch": 1.46, "grad_norm": 1.50385774696173, "learning_rate": 5.437858445723191e-06, "loss": 0.09, "step": 5362 }, { "epoch": 1.46, "grad_norm": 1.811790801267793, "learning_rate": 5.436390407519072e-06, "loss": 0.1179, "step": 5363 }, { "epoch": 1.46, "grad_norm": 2.139819314232434, "learning_rate": 5.43492233140598e-06, "loss": 0.1269, "step": 5364 }, { "epoch": 1.46, "grad_norm": 1.6323557311836059, "learning_rate": 5.4334542175114495e-06, "loss": 0.0953, "step": 5365 }, { "epoch": 1.46, "grad_norm": 1.8822564660148549, "learning_rate": 5.431986065963008e-06, "loss": 0.1268, "step": 5366 }, { "epoch": 1.47, "grad_norm": 1.8287951562105682, "learning_rate": 5.430517876888199e-06, "loss": 0.1111, "step": 5367 }, { "epoch": 1.47, "grad_norm": 1.9578642108356705, "learning_rate": 5.4290496504145595e-06, "loss": 0.1379, "step": 5368 }, { "epoch": 1.47, "grad_norm": 1.8105815968441237, "learning_rate": 5.427581386669635e-06, "loss": 0.1196, "step": 5369 }, { "epoch": 1.47, "grad_norm": 1.7774140981727296, "learning_rate": 5.426113085780971e-06, "loss": 0.1085, "step": 5370 }, { "epoch": 1.47, "grad_norm": 1.9187038356090786, "learning_rate": 5.424644747876121e-06, "loss": 0.117, "step": 5371 }, { "epoch": 1.47, "grad_norm": 2.0099795512932945, "learning_rate": 5.423176373082636e-06, "loss": 0.093, "step": 5372 }, { "epoch": 1.47, "grad_norm": 1.8668897617992943, "learning_rate": 5.421707961528073e-06, "loss": 0.1123, "step": 5373 }, { "epoch": 1.47, "grad_norm": 1.8400590378911752, "learning_rate": 5.4202395133399955e-06, "loss": 0.1244, "step": 5374 }, { "epoch": 1.47, "grad_norm": 1.9791363922410767, "learning_rate": 5.418771028645962e-06, "loss": 0.117, "step": 5375 }, { "epoch": 1.47, "grad_norm": 1.7833718931959492, "learning_rate": 5.41730250757354e-06, "loss": 0.1096, "step": 5376 }, { "epoch": 1.47, "grad_norm": 1.8667183246935424, "learning_rate": 5.415833950250302e-06, "loss": 0.1197, "step": 5377 }, { "epoch": 1.47, "grad_norm": 1.941056875846269, "learning_rate": 5.414365356803817e-06, "loss": 0.1238, "step": 5378 }, { "epoch": 1.47, "grad_norm": 1.6231093106127832, "learning_rate": 5.412896727361663e-06, "loss": 0.0989, "step": 5379 }, { "epoch": 1.47, "grad_norm": 1.622461326398856, "learning_rate": 5.411428062051418e-06, "loss": 0.099, "step": 5380 }, { "epoch": 1.47, "grad_norm": 1.6973197504775575, "learning_rate": 5.409959361000665e-06, "loss": 0.1103, "step": 5381 }, { "epoch": 1.47, "grad_norm": 2.036626134080738, "learning_rate": 5.408490624336987e-06, "loss": 0.124, "step": 5382 }, { "epoch": 1.47, "grad_norm": 2.0390093464123584, "learning_rate": 5.407021852187976e-06, "loss": 0.132, "step": 5383 }, { "epoch": 1.47, "grad_norm": 1.8688049493497199, "learning_rate": 5.40555304468122e-06, "loss": 0.1087, "step": 5384 }, { "epoch": 1.47, "grad_norm": 1.7657774746978248, "learning_rate": 5.404084201944315e-06, "loss": 0.1167, "step": 5385 }, { "epoch": 1.47, "grad_norm": 1.667686551566891, "learning_rate": 5.402615324104858e-06, "loss": 0.0977, "step": 5386 }, { "epoch": 1.47, "grad_norm": 1.8424574401429858, "learning_rate": 5.40114641129045e-06, "loss": 0.1233, "step": 5387 }, { "epoch": 1.47, "grad_norm": 1.9061416063010608, "learning_rate": 5.399677463628695e-06, "loss": 0.1194, "step": 5388 }, { "epoch": 1.47, "grad_norm": 1.6374474404333954, "learning_rate": 5.398208481247198e-06, "loss": 0.1071, "step": 5389 }, { "epoch": 1.47, "grad_norm": 2.0099240870564454, "learning_rate": 5.396739464273569e-06, "loss": 0.1207, "step": 5390 }, { "epoch": 1.47, "grad_norm": 1.938987331654749, "learning_rate": 5.395270412835423e-06, "loss": 0.1181, "step": 5391 }, { "epoch": 1.47, "grad_norm": 1.9258258082537223, "learning_rate": 5.393801327060372e-06, "loss": 0.1225, "step": 5392 }, { "epoch": 1.47, "grad_norm": 1.9710305906378367, "learning_rate": 5.392332207076036e-06, "loss": 0.1317, "step": 5393 }, { "epoch": 1.47, "grad_norm": 2.1308237195391513, "learning_rate": 5.390863053010038e-06, "loss": 0.1345, "step": 5394 }, { "epoch": 1.47, "grad_norm": 1.8339198457387127, "learning_rate": 5.389393864990001e-06, "loss": 0.1229, "step": 5395 }, { "epoch": 1.47, "grad_norm": 2.0524854673575383, "learning_rate": 5.387924643143553e-06, "loss": 0.1245, "step": 5396 }, { "epoch": 1.47, "grad_norm": 1.9154452065107503, "learning_rate": 5.386455387598325e-06, "loss": 0.1235, "step": 5397 }, { "epoch": 1.47, "grad_norm": 2.0372973714155407, "learning_rate": 5.384986098481948e-06, "loss": 0.12, "step": 5398 }, { "epoch": 1.47, "grad_norm": 1.870346753126263, "learning_rate": 5.383516775922061e-06, "loss": 0.1203, "step": 5399 }, { "epoch": 1.47, "grad_norm": 1.8854025881316814, "learning_rate": 5.382047420046302e-06, "loss": 0.115, "step": 5400 }, { "epoch": 1.47, "grad_norm": 1.768386777215909, "learning_rate": 5.380578030982313e-06, "loss": 0.1232, "step": 5401 }, { "epoch": 1.47, "grad_norm": 1.8956770024484721, "learning_rate": 5.379108608857739e-06, "loss": 0.1187, "step": 5402 }, { "epoch": 1.48, "grad_norm": 2.230955789474564, "learning_rate": 5.377639153800229e-06, "loss": 0.1317, "step": 5403 }, { "epoch": 1.48, "grad_norm": 1.8160872160067627, "learning_rate": 5.3761696659374315e-06, "loss": 0.1016, "step": 5404 }, { "epoch": 1.48, "grad_norm": 1.8644833216940295, "learning_rate": 5.3747001453970005e-06, "loss": 0.1218, "step": 5405 }, { "epoch": 1.48, "grad_norm": 1.8640313511085593, "learning_rate": 5.373230592306595e-06, "loss": 0.1299, "step": 5406 }, { "epoch": 1.48, "grad_norm": 1.7137013584991465, "learning_rate": 5.371761006793871e-06, "loss": 0.1005, "step": 5407 }, { "epoch": 1.48, "grad_norm": 1.9408380939281658, "learning_rate": 5.370291388986491e-06, "loss": 0.1246, "step": 5408 }, { "epoch": 1.48, "grad_norm": 1.9167641967436153, "learning_rate": 5.368821739012122e-06, "loss": 0.1259, "step": 5409 }, { "epoch": 1.48, "grad_norm": 1.6288732396568644, "learning_rate": 5.367352056998429e-06, "loss": 0.0982, "step": 5410 }, { "epoch": 1.48, "grad_norm": 1.9667016016364929, "learning_rate": 5.3658823430730834e-06, "loss": 0.1351, "step": 5411 }, { "epoch": 1.48, "grad_norm": 1.8630565890108357, "learning_rate": 5.36441259736376e-06, "loss": 0.1145, "step": 5412 }, { "epoch": 1.48, "grad_norm": 2.051144487439951, "learning_rate": 5.362942819998131e-06, "loss": 0.1268, "step": 5413 }, { "epoch": 1.48, "grad_norm": 1.9013794972576763, "learning_rate": 5.361473011103879e-06, "loss": 0.1059, "step": 5414 }, { "epoch": 1.48, "grad_norm": 1.9188026307920527, "learning_rate": 5.360003170808684e-06, "loss": 0.1276, "step": 5415 }, { "epoch": 1.48, "grad_norm": 1.9047557059065274, "learning_rate": 5.358533299240228e-06, "loss": 0.1279, "step": 5416 }, { "epoch": 1.48, "grad_norm": 1.8222789137510702, "learning_rate": 5.357063396526201e-06, "loss": 0.1086, "step": 5417 }, { "epoch": 1.48, "grad_norm": 1.9020706774030296, "learning_rate": 5.355593462794292e-06, "loss": 0.1197, "step": 5418 }, { "epoch": 1.48, "grad_norm": 2.075733079654561, "learning_rate": 5.354123498172191e-06, "loss": 0.1251, "step": 5419 }, { "epoch": 1.48, "grad_norm": 1.8623482595294856, "learning_rate": 5.352653502787595e-06, "loss": 0.1105, "step": 5420 }, { "epoch": 1.48, "grad_norm": 1.8098337804556652, "learning_rate": 5.351183476768202e-06, "loss": 0.1003, "step": 5421 }, { "epoch": 1.48, "grad_norm": 1.8494545752735825, "learning_rate": 5.34971342024171e-06, "loss": 0.1258, "step": 5422 }, { "epoch": 1.48, "grad_norm": 1.8290360296814934, "learning_rate": 5.348243333335823e-06, "loss": 0.1254, "step": 5423 }, { "epoch": 1.48, "grad_norm": 2.043789887935686, "learning_rate": 5.346773216178248e-06, "loss": 0.1078, "step": 5424 }, { "epoch": 1.48, "grad_norm": 2.0542332837201034, "learning_rate": 5.345303068896692e-06, "loss": 0.1328, "step": 5425 }, { "epoch": 1.48, "grad_norm": 1.8579128298271494, "learning_rate": 5.3438328916188655e-06, "loss": 0.113, "step": 5426 }, { "epoch": 1.48, "grad_norm": 2.0727797046882337, "learning_rate": 5.342362684472483e-06, "loss": 0.1279, "step": 5427 }, { "epoch": 1.48, "grad_norm": 1.9803406155549197, "learning_rate": 5.3408924475852585e-06, "loss": 0.1353, "step": 5428 }, { "epoch": 1.48, "grad_norm": 1.5916085315665354, "learning_rate": 5.3394221810849125e-06, "loss": 0.0966, "step": 5429 }, { "epoch": 1.48, "grad_norm": 1.7014633358515945, "learning_rate": 5.337951885099167e-06, "loss": 0.1052, "step": 5430 }, { "epoch": 1.48, "grad_norm": 1.8184008827614397, "learning_rate": 5.336481559755742e-06, "loss": 0.1032, "step": 5431 }, { "epoch": 1.48, "grad_norm": 1.6992542137518225, "learning_rate": 5.335011205182366e-06, "loss": 0.1022, "step": 5432 }, { "epoch": 1.48, "grad_norm": 2.002014863379674, "learning_rate": 5.33354082150677e-06, "loss": 0.1258, "step": 5433 }, { "epoch": 1.48, "grad_norm": 1.8476666819000511, "learning_rate": 5.332070408856681e-06, "loss": 0.1279, "step": 5434 }, { "epoch": 1.48, "grad_norm": 2.106112551655337, "learning_rate": 5.330599967359836e-06, "loss": 0.1267, "step": 5435 }, { "epoch": 1.48, "grad_norm": 1.9703838168497059, "learning_rate": 5.329129497143971e-06, "loss": 0.1281, "step": 5436 }, { "epoch": 1.48, "grad_norm": 2.016946601179282, "learning_rate": 5.327658998336825e-06, "loss": 0.1389, "step": 5437 }, { "epoch": 1.48, "grad_norm": 2.42039575408107, "learning_rate": 5.326188471066136e-06, "loss": 0.1169, "step": 5438 }, { "epoch": 1.48, "grad_norm": 1.7268366868352651, "learning_rate": 5.3247179154596525e-06, "loss": 0.1164, "step": 5439 }, { "epoch": 1.49, "grad_norm": 1.7369725057881076, "learning_rate": 5.323247331645118e-06, "loss": 0.0978, "step": 5440 }, { "epoch": 1.49, "grad_norm": 1.6167532800095725, "learning_rate": 5.321776719750283e-06, "loss": 0.1004, "step": 5441 }, { "epoch": 1.49, "grad_norm": 2.0935810634785885, "learning_rate": 5.3203060799028976e-06, "loss": 0.1256, "step": 5442 }, { "epoch": 1.49, "grad_norm": 1.8103419327591084, "learning_rate": 5.318835412230714e-06, "loss": 0.1025, "step": 5443 }, { "epoch": 1.49, "grad_norm": 1.957669970356974, "learning_rate": 5.3173647168614906e-06, "loss": 0.1043, "step": 5444 }, { "epoch": 1.49, "grad_norm": 2.185145221144955, "learning_rate": 5.3158939939229855e-06, "loss": 0.1231, "step": 5445 }, { "epoch": 1.49, "grad_norm": 1.5721845787137647, "learning_rate": 5.314423243542959e-06, "loss": 0.0971, "step": 5446 }, { "epoch": 1.49, "grad_norm": 2.0181474568045314, "learning_rate": 5.312952465849173e-06, "loss": 0.1081, "step": 5447 }, { "epoch": 1.49, "grad_norm": 1.8344109746025858, "learning_rate": 5.311481660969395e-06, "loss": 0.1107, "step": 5448 }, { "epoch": 1.49, "grad_norm": 2.0499702113211424, "learning_rate": 5.310010829031392e-06, "loss": 0.1245, "step": 5449 }, { "epoch": 1.49, "grad_norm": 2.0650918523973703, "learning_rate": 5.3085399701629344e-06, "loss": 0.1311, "step": 5450 }, { "epoch": 1.49, "grad_norm": 2.111119967535554, "learning_rate": 5.307069084491797e-06, "loss": 0.1441, "step": 5451 }, { "epoch": 1.49, "grad_norm": 1.603316496380195, "learning_rate": 5.305598172145751e-06, "loss": 0.0938, "step": 5452 }, { "epoch": 1.49, "grad_norm": 1.8980223416976045, "learning_rate": 5.304127233252574e-06, "loss": 0.1207, "step": 5453 }, { "epoch": 1.49, "grad_norm": 1.7889506035417992, "learning_rate": 5.30265626794005e-06, "loss": 0.0929, "step": 5454 }, { "epoch": 1.49, "grad_norm": 1.8918028478029527, "learning_rate": 5.301185276335956e-06, "loss": 0.1127, "step": 5455 }, { "epoch": 1.49, "grad_norm": 2.2012681602529223, "learning_rate": 5.299714258568077e-06, "loss": 0.1196, "step": 5456 }, { "epoch": 1.49, "grad_norm": 1.8833081500806705, "learning_rate": 5.298243214764203e-06, "loss": 0.1195, "step": 5457 }, { "epoch": 1.49, "grad_norm": 1.8309389790151482, "learning_rate": 5.296772145052118e-06, "loss": 0.1077, "step": 5458 }, { "epoch": 1.49, "grad_norm": 1.8552252204011952, "learning_rate": 5.295301049559616e-06, "loss": 0.1059, "step": 5459 }, { "epoch": 1.49, "grad_norm": 2.2029322268580778, "learning_rate": 5.29382992841449e-06, "loss": 0.1223, "step": 5460 }, { "epoch": 1.49, "grad_norm": 1.886531701622489, "learning_rate": 5.292358781744533e-06, "loss": 0.1114, "step": 5461 }, { "epoch": 1.49, "grad_norm": 1.7201363791895972, "learning_rate": 5.290887609677545e-06, "loss": 0.1125, "step": 5462 }, { "epoch": 1.49, "grad_norm": 1.9765320171125889, "learning_rate": 5.289416412341326e-06, "loss": 0.121, "step": 5463 }, { "epoch": 1.49, "grad_norm": 1.9735494735066739, "learning_rate": 5.287945189863676e-06, "loss": 0.1185, "step": 5464 }, { "epoch": 1.49, "grad_norm": 1.7265716090888994, "learning_rate": 5.2864739423723996e-06, "loss": 0.1089, "step": 5465 }, { "epoch": 1.49, "grad_norm": 1.9063092883715924, "learning_rate": 5.285002669995306e-06, "loss": 0.1226, "step": 5466 }, { "epoch": 1.49, "grad_norm": 1.6350154751736292, "learning_rate": 5.283531372860201e-06, "loss": 0.1087, "step": 5467 }, { "epoch": 1.49, "grad_norm": 1.8433539037068998, "learning_rate": 5.282060051094895e-06, "loss": 0.1226, "step": 5468 }, { "epoch": 1.49, "grad_norm": 1.6937732976531295, "learning_rate": 5.2805887048272035e-06, "loss": 0.1138, "step": 5469 }, { "epoch": 1.49, "grad_norm": 1.8574343730451959, "learning_rate": 5.279117334184939e-06, "loss": 0.124, "step": 5470 }, { "epoch": 1.49, "grad_norm": 1.88357716621281, "learning_rate": 5.2776459392959186e-06, "loss": 0.1077, "step": 5471 }, { "epoch": 1.49, "grad_norm": 1.7460263466076074, "learning_rate": 5.2761745202879636e-06, "loss": 0.1146, "step": 5472 }, { "epoch": 1.49, "grad_norm": 2.13290242786307, "learning_rate": 5.274703077288893e-06, "loss": 0.115, "step": 5473 }, { "epoch": 1.49, "grad_norm": 2.077001707197766, "learning_rate": 5.27323161042653e-06, "loss": 0.1343, "step": 5474 }, { "epoch": 1.49, "grad_norm": 1.8596094435233093, "learning_rate": 5.271760119828703e-06, "loss": 0.1035, "step": 5475 }, { "epoch": 1.49, "grad_norm": 1.6517817795141545, "learning_rate": 5.270288605623237e-06, "loss": 0.1019, "step": 5476 }, { "epoch": 1.5, "grad_norm": 1.7913906566920585, "learning_rate": 5.268817067937962e-06, "loss": 0.1068, "step": 5477 }, { "epoch": 1.5, "grad_norm": 2.1806506240737864, "learning_rate": 5.26734550690071e-06, "loss": 0.1496, "step": 5478 }, { "epoch": 1.5, "grad_norm": 1.7354540816232031, "learning_rate": 5.265873922639315e-06, "loss": 0.1152, "step": 5479 }, { "epoch": 1.5, "grad_norm": 1.8956449065392988, "learning_rate": 5.264402315281613e-06, "loss": 0.1385, "step": 5480 }, { "epoch": 1.5, "grad_norm": 1.6082443965958646, "learning_rate": 5.262930684955439e-06, "loss": 0.0929, "step": 5481 }, { "epoch": 1.5, "grad_norm": 1.9332125092649675, "learning_rate": 5.261459031788634e-06, "loss": 0.1298, "step": 5482 }, { "epoch": 1.5, "grad_norm": 1.9033050135458704, "learning_rate": 5.259987355909042e-06, "loss": 0.1143, "step": 5483 }, { "epoch": 1.5, "grad_norm": 1.5923888783129834, "learning_rate": 5.258515657444503e-06, "loss": 0.1064, "step": 5484 }, { "epoch": 1.5, "grad_norm": 2.005318272274225, "learning_rate": 5.257043936522864e-06, "loss": 0.1352, "step": 5485 }, { "epoch": 1.5, "grad_norm": 2.182297560930908, "learning_rate": 5.255572193271974e-06, "loss": 0.1677, "step": 5486 }, { "epoch": 1.5, "grad_norm": 1.668251637478478, "learning_rate": 5.254100427819681e-06, "loss": 0.1114, "step": 5487 }, { "epoch": 1.5, "grad_norm": 1.8564126608541183, "learning_rate": 5.252628640293834e-06, "loss": 0.1114, "step": 5488 }, { "epoch": 1.5, "grad_norm": 1.7932579286837058, "learning_rate": 5.251156830822293e-06, "loss": 0.1048, "step": 5489 }, { "epoch": 1.5, "grad_norm": 1.9381573882463772, "learning_rate": 5.249684999532906e-06, "loss": 0.1361, "step": 5490 }, { "epoch": 1.5, "grad_norm": 1.833008376701025, "learning_rate": 5.248213146553533e-06, "loss": 0.1176, "step": 5491 }, { "epoch": 1.5, "grad_norm": 2.085573112663426, "learning_rate": 5.2467412720120345e-06, "loss": 0.1482, "step": 5492 }, { "epoch": 1.5, "grad_norm": 1.9076362078359177, "learning_rate": 5.245269376036269e-06, "loss": 0.1274, "step": 5493 }, { "epoch": 1.5, "grad_norm": 1.8303109961597201, "learning_rate": 5.2437974587540994e-06, "loss": 0.1137, "step": 5494 }, { "epoch": 1.5, "grad_norm": 2.6465210405881163, "learning_rate": 5.242325520293393e-06, "loss": 0.1069, "step": 5495 }, { "epoch": 1.5, "grad_norm": 1.9385391538450683, "learning_rate": 5.240853560782013e-06, "loss": 0.1228, "step": 5496 }, { "epoch": 1.5, "grad_norm": 1.831495159273908, "learning_rate": 5.23938158034783e-06, "loss": 0.1108, "step": 5497 }, { "epoch": 1.5, "grad_norm": 1.8415022217007568, "learning_rate": 5.237909579118713e-06, "loss": 0.1189, "step": 5498 }, { "epoch": 1.5, "grad_norm": 1.9894904444522798, "learning_rate": 5.236437557222533e-06, "loss": 0.1205, "step": 5499 }, { "epoch": 1.5, "grad_norm": 2.017582276453585, "learning_rate": 5.234965514787164e-06, "loss": 0.1208, "step": 5500 }, { "epoch": 1.5, "grad_norm": 1.5813129923391347, "learning_rate": 5.233493451940483e-06, "loss": 0.1044, "step": 5501 }, { "epoch": 1.5, "grad_norm": 1.9495441774044682, "learning_rate": 5.2320213688103645e-06, "loss": 0.12, "step": 5502 }, { "epoch": 1.5, "grad_norm": 1.680220736710512, "learning_rate": 5.230549265524689e-06, "loss": 0.0992, "step": 5503 }, { "epoch": 1.5, "grad_norm": 2.1525253997865135, "learning_rate": 5.22907714221134e-06, "loss": 0.1161, "step": 5504 }, { "epoch": 1.5, "grad_norm": 1.8645637232857368, "learning_rate": 5.227604998998195e-06, "loss": 0.1031, "step": 5505 }, { "epoch": 1.5, "grad_norm": 1.925527683425804, "learning_rate": 5.226132836013142e-06, "loss": 0.1254, "step": 5506 }, { "epoch": 1.5, "grad_norm": 1.7177162600787759, "learning_rate": 5.224660653384064e-06, "loss": 0.1086, "step": 5507 }, { "epoch": 1.5, "grad_norm": 2.084156999693669, "learning_rate": 5.2231884512388505e-06, "loss": 0.1308, "step": 5508 }, { "epoch": 1.5, "grad_norm": 1.9647063306905448, "learning_rate": 5.22171622970539e-06, "loss": 0.1243, "step": 5509 }, { "epoch": 1.5, "grad_norm": 1.782484955165553, "learning_rate": 5.2202439889115755e-06, "loss": 0.1112, "step": 5510 }, { "epoch": 1.5, "grad_norm": 2.439899298606176, "learning_rate": 5.218771728985296e-06, "loss": 0.1257, "step": 5511 }, { "epoch": 1.5, "grad_norm": 1.9452047451192829, "learning_rate": 5.2172994500544485e-06, "loss": 0.1273, "step": 5512 }, { "epoch": 1.51, "grad_norm": 1.5380729424952688, "learning_rate": 5.215827152246928e-06, "loss": 0.0768, "step": 5513 }, { "epoch": 1.51, "grad_norm": 1.911422107254962, "learning_rate": 5.2143548356906336e-06, "loss": 0.1196, "step": 5514 }, { "epoch": 1.51, "grad_norm": 2.0369349170237143, "learning_rate": 5.212882500513462e-06, "loss": 0.1254, "step": 5515 }, { "epoch": 1.51, "grad_norm": 1.8467128802293964, "learning_rate": 5.211410146843316e-06, "loss": 0.1147, "step": 5516 }, { "epoch": 1.51, "grad_norm": 2.0227455725619263, "learning_rate": 5.209937774808098e-06, "loss": 0.1192, "step": 5517 }, { "epoch": 1.51, "grad_norm": 1.8280557628629182, "learning_rate": 5.208465384535711e-06, "loss": 0.1172, "step": 5518 }, { "epoch": 1.51, "grad_norm": 1.943499247667336, "learning_rate": 5.206992976154063e-06, "loss": 0.1209, "step": 5519 }, { "epoch": 1.51, "grad_norm": 1.7525501474821539, "learning_rate": 5.205520549791058e-06, "loss": 0.1215, "step": 5520 }, { "epoch": 1.51, "grad_norm": 1.7005970932037453, "learning_rate": 5.204048105574606e-06, "loss": 0.1144, "step": 5521 }, { "epoch": 1.51, "grad_norm": 1.6496039410322916, "learning_rate": 5.202575643632619e-06, "loss": 0.1031, "step": 5522 }, { "epoch": 1.51, "grad_norm": 1.8243207406825588, "learning_rate": 5.201103164093007e-06, "loss": 0.1176, "step": 5523 }, { "epoch": 1.51, "grad_norm": 1.661748958656446, "learning_rate": 5.199630667083682e-06, "loss": 0.0996, "step": 5524 }, { "epoch": 1.51, "grad_norm": 1.6875505161867994, "learning_rate": 5.198158152732564e-06, "loss": 0.1202, "step": 5525 }, { "epoch": 1.51, "grad_norm": 2.07128548085433, "learning_rate": 5.196685621167564e-06, "loss": 0.1284, "step": 5526 }, { "epoch": 1.51, "grad_norm": 1.9612023158244825, "learning_rate": 5.195213072516603e-06, "loss": 0.1034, "step": 5527 }, { "epoch": 1.51, "grad_norm": 2.1650998447366296, "learning_rate": 5.193740506907601e-06, "loss": 0.145, "step": 5528 }, { "epoch": 1.51, "grad_norm": 1.8585114812108394, "learning_rate": 5.192267924468476e-06, "loss": 0.1177, "step": 5529 }, { "epoch": 1.51, "grad_norm": 1.5740653915234362, "learning_rate": 5.1907953253271514e-06, "loss": 0.0898, "step": 5530 }, { "epoch": 1.51, "grad_norm": 1.949270206733329, "learning_rate": 5.189322709611552e-06, "loss": 0.1107, "step": 5531 }, { "epoch": 1.51, "grad_norm": 1.7201998366613591, "learning_rate": 5.187850077449604e-06, "loss": 0.1215, "step": 5532 }, { "epoch": 1.51, "grad_norm": 1.8774995750030488, "learning_rate": 5.186377428969232e-06, "loss": 0.1236, "step": 5533 }, { "epoch": 1.51, "grad_norm": 1.9386935512614523, "learning_rate": 5.184904764298364e-06, "loss": 0.1205, "step": 5534 }, { "epoch": 1.51, "grad_norm": 1.9732850854733865, "learning_rate": 5.183432083564931e-06, "loss": 0.1245, "step": 5535 }, { "epoch": 1.51, "grad_norm": 1.766023149949048, "learning_rate": 5.181959386896862e-06, "loss": 0.121, "step": 5536 }, { "epoch": 1.51, "grad_norm": 1.8855184058671322, "learning_rate": 5.180486674422091e-06, "loss": 0.1235, "step": 5537 }, { "epoch": 1.51, "grad_norm": 2.0236063177403056, "learning_rate": 5.179013946268552e-06, "loss": 0.1154, "step": 5538 }, { "epoch": 1.51, "grad_norm": 2.106226598638917, "learning_rate": 5.177541202564177e-06, "loss": 0.1177, "step": 5539 }, { "epoch": 1.51, "grad_norm": 1.9068869324121809, "learning_rate": 5.176068443436907e-06, "loss": 0.1133, "step": 5540 }, { "epoch": 1.51, "grad_norm": 1.8342978640108614, "learning_rate": 5.174595669014675e-06, "loss": 0.113, "step": 5541 }, { "epoch": 1.51, "grad_norm": 1.6821753810546645, "learning_rate": 5.173122879425423e-06, "loss": 0.1109, "step": 5542 }, { "epoch": 1.51, "grad_norm": 2.0889860484782243, "learning_rate": 5.17165007479709e-06, "loss": 0.1212, "step": 5543 }, { "epoch": 1.51, "grad_norm": 1.8245016757926622, "learning_rate": 5.170177255257618e-06, "loss": 0.1125, "step": 5544 }, { "epoch": 1.51, "grad_norm": 2.027718152803371, "learning_rate": 5.16870442093495e-06, "loss": 0.1344, "step": 5545 }, { "epoch": 1.51, "grad_norm": 2.1516525393791275, "learning_rate": 5.167231571957032e-06, "loss": 0.1246, "step": 5546 }, { "epoch": 1.51, "grad_norm": 1.8753629460150982, "learning_rate": 5.165758708451807e-06, "loss": 0.1282, "step": 5547 }, { "epoch": 1.51, "grad_norm": 1.9724831638143476, "learning_rate": 5.164285830547221e-06, "loss": 0.1308, "step": 5548 }, { "epoch": 1.51, "grad_norm": 1.8003082765960625, "learning_rate": 5.162812938371226e-06, "loss": 0.1346, "step": 5549 }, { "epoch": 1.52, "grad_norm": 1.9449248516573328, "learning_rate": 5.161340032051767e-06, "loss": 0.1145, "step": 5550 }, { "epoch": 1.52, "grad_norm": 1.8266982634710451, "learning_rate": 5.159867111716797e-06, "loss": 0.1065, "step": 5551 }, { "epoch": 1.52, "grad_norm": 1.9620052554637786, "learning_rate": 5.158394177494268e-06, "loss": 0.1093, "step": 5552 }, { "epoch": 1.52, "grad_norm": 1.711501362078701, "learning_rate": 5.156921229512131e-06, "loss": 0.1113, "step": 5553 }, { "epoch": 1.52, "grad_norm": 1.828331898998082, "learning_rate": 5.15544826789834e-06, "loss": 0.1163, "step": 5554 }, { "epoch": 1.52, "grad_norm": 2.2031074109439657, "learning_rate": 5.153975292780852e-06, "loss": 0.1285, "step": 5555 }, { "epoch": 1.52, "grad_norm": 2.1428979507569488, "learning_rate": 5.1525023042876245e-06, "loss": 0.1521, "step": 5556 }, { "epoch": 1.52, "grad_norm": 1.7640243699101763, "learning_rate": 5.151029302546612e-06, "loss": 0.0998, "step": 5557 }, { "epoch": 1.52, "grad_norm": 1.7058676701755744, "learning_rate": 5.149556287685775e-06, "loss": 0.1114, "step": 5558 }, { "epoch": 1.52, "grad_norm": 1.749880608814022, "learning_rate": 5.148083259833073e-06, "loss": 0.1247, "step": 5559 }, { "epoch": 1.52, "grad_norm": 2.042925597188746, "learning_rate": 5.146610219116467e-06, "loss": 0.1281, "step": 5560 }, { "epoch": 1.52, "grad_norm": 1.7715506526778562, "learning_rate": 5.145137165663921e-06, "loss": 0.1166, "step": 5561 }, { "epoch": 1.52, "grad_norm": 1.8047194074529922, "learning_rate": 5.143664099603394e-06, "loss": 0.1001, "step": 5562 }, { "epoch": 1.52, "grad_norm": 1.990602952832416, "learning_rate": 5.142191021062854e-06, "loss": 0.1144, "step": 5563 }, { "epoch": 1.52, "grad_norm": 1.7409881419880036, "learning_rate": 5.140717930170267e-06, "loss": 0.1105, "step": 5564 }, { "epoch": 1.52, "grad_norm": 1.721682316889882, "learning_rate": 5.139244827053595e-06, "loss": 0.1088, "step": 5565 }, { "epoch": 1.52, "grad_norm": 1.8850251993259846, "learning_rate": 5.137771711840811e-06, "loss": 0.1366, "step": 5566 }, { "epoch": 1.52, "grad_norm": 1.8311969043490457, "learning_rate": 5.13629858465988e-06, "loss": 0.105, "step": 5567 }, { "epoch": 1.52, "grad_norm": 1.8250626920858655, "learning_rate": 5.134825445638772e-06, "loss": 0.1217, "step": 5568 }, { "epoch": 1.52, "grad_norm": 1.9165437287039446, "learning_rate": 5.133352294905461e-06, "loss": 0.1368, "step": 5569 }, { "epoch": 1.52, "grad_norm": 1.648731143092894, "learning_rate": 5.131879132587915e-06, "loss": 0.1062, "step": 5570 }, { "epoch": 1.52, "grad_norm": 2.305510525623916, "learning_rate": 5.130405958814108e-06, "loss": 0.1332, "step": 5571 }, { "epoch": 1.52, "grad_norm": 1.9158620855468196, "learning_rate": 5.1289327737120145e-06, "loss": 0.1111, "step": 5572 }, { "epoch": 1.52, "grad_norm": 1.6979154737301312, "learning_rate": 5.1274595774096055e-06, "loss": 0.1129, "step": 5573 }, { "epoch": 1.52, "grad_norm": 1.861734382316711, "learning_rate": 5.125986370034862e-06, "loss": 0.1244, "step": 5574 }, { "epoch": 1.52, "grad_norm": 1.7052257697986306, "learning_rate": 5.124513151715759e-06, "loss": 0.0949, "step": 5575 }, { "epoch": 1.52, "grad_norm": 1.9022504903614739, "learning_rate": 5.1230399225802715e-06, "loss": 0.1116, "step": 5576 }, { "epoch": 1.52, "grad_norm": 1.9294389115024777, "learning_rate": 5.12156668275638e-06, "loss": 0.1343, "step": 5577 }, { "epoch": 1.52, "grad_norm": 1.7962084987355424, "learning_rate": 5.120093432372065e-06, "loss": 0.114, "step": 5578 }, { "epoch": 1.52, "grad_norm": 1.9131206564039642, "learning_rate": 5.1186201715553055e-06, "loss": 0.129, "step": 5579 }, { "epoch": 1.52, "grad_norm": 1.9541140275212818, "learning_rate": 5.117146900434082e-06, "loss": 0.1254, "step": 5580 }, { "epoch": 1.52, "grad_norm": 2.041725234566848, "learning_rate": 5.115673619136378e-06, "loss": 0.1228, "step": 5581 }, { "epoch": 1.52, "grad_norm": 1.7287929040843057, "learning_rate": 5.114200327790178e-06, "loss": 0.1088, "step": 5582 }, { "epoch": 1.52, "grad_norm": 2.0976604980732034, "learning_rate": 5.112727026523461e-06, "loss": 0.1357, "step": 5583 }, { "epoch": 1.52, "grad_norm": 1.8011316653008067, "learning_rate": 5.111253715464217e-06, "loss": 0.1092, "step": 5584 }, { "epoch": 1.52, "grad_norm": 1.7089208745209623, "learning_rate": 5.109780394740429e-06, "loss": 0.1195, "step": 5585 }, { "epoch": 1.52, "grad_norm": 1.8850089193004487, "learning_rate": 5.108307064480084e-06, "loss": 0.1204, "step": 5586 }, { "epoch": 1.53, "grad_norm": 1.7199709097192781, "learning_rate": 5.10683372481117e-06, "loss": 0.1048, "step": 5587 }, { "epoch": 1.53, "grad_norm": 1.684038680390065, "learning_rate": 5.105360375861673e-06, "loss": 0.1071, "step": 5588 }, { "epoch": 1.53, "grad_norm": 1.884816659817741, "learning_rate": 5.103887017759585e-06, "loss": 0.1186, "step": 5589 }, { "epoch": 1.53, "grad_norm": 1.8049175103913289, "learning_rate": 5.1024136506328935e-06, "loss": 0.1114, "step": 5590 }, { "epoch": 1.53, "grad_norm": 1.6393052797805543, "learning_rate": 5.10094027460959e-06, "loss": 0.1139, "step": 5591 }, { "epoch": 1.53, "grad_norm": 1.8008624285750674, "learning_rate": 5.099466889817664e-06, "loss": 0.1269, "step": 5592 }, { "epoch": 1.53, "grad_norm": 1.9565035504860313, "learning_rate": 5.097993496385112e-06, "loss": 0.1121, "step": 5593 }, { "epoch": 1.53, "grad_norm": 1.668896305008462, "learning_rate": 5.0965200944399215e-06, "loss": 0.1079, "step": 5594 }, { "epoch": 1.53, "grad_norm": 1.7751093056103935, "learning_rate": 5.09504668411009e-06, "loss": 0.1165, "step": 5595 }, { "epoch": 1.53, "grad_norm": 1.7781712490982982, "learning_rate": 5.093573265523609e-06, "loss": 0.1118, "step": 5596 }, { "epoch": 1.53, "grad_norm": 1.7516702649679112, "learning_rate": 5.0920998388084755e-06, "loss": 0.1124, "step": 5597 }, { "epoch": 1.53, "grad_norm": 1.674652659612386, "learning_rate": 5.090626404092682e-06, "loss": 0.1153, "step": 5598 }, { "epoch": 1.53, "grad_norm": 1.6724900799610873, "learning_rate": 5.0891529615042305e-06, "loss": 0.1014, "step": 5599 }, { "epoch": 1.53, "grad_norm": 1.9206051067678724, "learning_rate": 5.087679511171113e-06, "loss": 0.116, "step": 5600 }, { "epoch": 1.53, "grad_norm": 1.8036062937861914, "learning_rate": 5.086206053221328e-06, "loss": 0.1132, "step": 5601 }, { "epoch": 1.53, "grad_norm": 2.2424271425643707, "learning_rate": 5.084732587782878e-06, "loss": 0.1182, "step": 5602 }, { "epoch": 1.53, "grad_norm": 1.8681286876223646, "learning_rate": 5.083259114983757e-06, "loss": 0.1114, "step": 5603 }, { "epoch": 1.53, "grad_norm": 1.8417970612281476, "learning_rate": 5.081785634951967e-06, "loss": 0.0925, "step": 5604 }, { "epoch": 1.53, "grad_norm": 1.8652003201407068, "learning_rate": 5.0803121478155085e-06, "loss": 0.122, "step": 5605 }, { "epoch": 1.53, "grad_norm": 1.8224579755735701, "learning_rate": 5.078838653702381e-06, "loss": 0.1248, "step": 5606 }, { "epoch": 1.53, "grad_norm": 1.8192520281293496, "learning_rate": 5.077365152740587e-06, "loss": 0.1112, "step": 5607 }, { "epoch": 1.53, "grad_norm": 1.925189457085331, "learning_rate": 5.075891645058129e-06, "loss": 0.1218, "step": 5608 }, { "epoch": 1.53, "grad_norm": 1.714661287225587, "learning_rate": 5.0744181307830095e-06, "loss": 0.0987, "step": 5609 }, { "epoch": 1.53, "grad_norm": 1.9243594989547417, "learning_rate": 5.0729446100432326e-06, "loss": 0.1049, "step": 5610 }, { "epoch": 1.53, "grad_norm": 2.092910354194046, "learning_rate": 5.0714710829668004e-06, "loss": 0.128, "step": 5611 }, { "epoch": 1.53, "grad_norm": 1.8465912217009812, "learning_rate": 5.069997549681718e-06, "loss": 0.1322, "step": 5612 }, { "epoch": 1.53, "grad_norm": 1.5969199700198156, "learning_rate": 5.068524010315989e-06, "loss": 0.1087, "step": 5613 }, { "epoch": 1.53, "grad_norm": 1.7168261258946884, "learning_rate": 5.067050464997624e-06, "loss": 0.0953, "step": 5614 }, { "epoch": 1.53, "grad_norm": 2.074251717778969, "learning_rate": 5.065576913854623e-06, "loss": 0.1432, "step": 5615 }, { "epoch": 1.53, "grad_norm": 1.7528318966637877, "learning_rate": 5.064103357014995e-06, "loss": 0.1128, "step": 5616 }, { "epoch": 1.53, "grad_norm": 1.813077194357431, "learning_rate": 5.062629794606748e-06, "loss": 0.1208, "step": 5617 }, { "epoch": 1.53, "grad_norm": 1.8105683627264062, "learning_rate": 5.061156226757887e-06, "loss": 0.1159, "step": 5618 }, { "epoch": 1.53, "grad_norm": 2.0692919953294555, "learning_rate": 5.059682653596422e-06, "loss": 0.1335, "step": 5619 }, { "epoch": 1.53, "grad_norm": 1.7553729412505454, "learning_rate": 5.058209075250361e-06, "loss": 0.1147, "step": 5620 }, { "epoch": 1.53, "grad_norm": 2.047342904492541, "learning_rate": 5.056735491847712e-06, "loss": 0.1465, "step": 5621 }, { "epoch": 1.53, "grad_norm": 1.708023377980643, "learning_rate": 5.055261903516485e-06, "loss": 0.1004, "step": 5622 }, { "epoch": 1.54, "grad_norm": 1.7425084013139187, "learning_rate": 5.053788310384691e-06, "loss": 0.102, "step": 5623 }, { "epoch": 1.54, "grad_norm": 1.8104132235779342, "learning_rate": 5.052314712580336e-06, "loss": 0.1255, "step": 5624 }, { "epoch": 1.54, "grad_norm": 1.6862834881513558, "learning_rate": 5.050841110231435e-06, "loss": 0.0885, "step": 5625 }, { "epoch": 1.54, "grad_norm": 1.590692119395041, "learning_rate": 5.049367503465998e-06, "loss": 0.0884, "step": 5626 }, { "epoch": 1.54, "grad_norm": 1.92226024012906, "learning_rate": 5.047893892412035e-06, "loss": 0.1331, "step": 5627 }, { "epoch": 1.54, "grad_norm": 1.835177765250274, "learning_rate": 5.046420277197558e-06, "loss": 0.1014, "step": 5628 }, { "epoch": 1.54, "grad_norm": 3.4299238542388544, "learning_rate": 5.04494665795058e-06, "loss": 0.13, "step": 5629 }, { "epoch": 1.54, "grad_norm": 1.6232884903013816, "learning_rate": 5.043473034799112e-06, "loss": 0.1099, "step": 5630 }, { "epoch": 1.54, "grad_norm": 1.7168779115045096, "learning_rate": 5.041999407871168e-06, "loss": 0.1009, "step": 5631 }, { "epoch": 1.54, "grad_norm": 1.9260979250250478, "learning_rate": 5.040525777294762e-06, "loss": 0.1328, "step": 5632 }, { "epoch": 1.54, "grad_norm": 1.6021866287067223, "learning_rate": 5.039052143197904e-06, "loss": 0.1021, "step": 5633 }, { "epoch": 1.54, "grad_norm": 1.6973376066730286, "learning_rate": 5.03757850570861e-06, "loss": 0.1023, "step": 5634 }, { "epoch": 1.54, "grad_norm": 1.8539831428433575, "learning_rate": 5.036104864954895e-06, "loss": 0.1075, "step": 5635 }, { "epoch": 1.54, "grad_norm": 1.7903668627281948, "learning_rate": 5.034631221064771e-06, "loss": 0.1131, "step": 5636 }, { "epoch": 1.54, "grad_norm": 1.861225324590869, "learning_rate": 5.033157574166254e-06, "loss": 0.0891, "step": 5637 }, { "epoch": 1.54, "grad_norm": 1.8802712174841574, "learning_rate": 5.031683924387359e-06, "loss": 0.1227, "step": 5638 }, { "epoch": 1.54, "grad_norm": 1.9809688520309148, "learning_rate": 5.0302102718561e-06, "loss": 0.1139, "step": 5639 }, { "epoch": 1.54, "grad_norm": 1.8584979550167655, "learning_rate": 5.0287366167004925e-06, "loss": 0.1073, "step": 5640 }, { "epoch": 1.54, "grad_norm": 2.296553622552679, "learning_rate": 5.027262959048554e-06, "loss": 0.1104, "step": 5641 }, { "epoch": 1.54, "grad_norm": 2.291486010804984, "learning_rate": 5.0257892990282965e-06, "loss": 0.1532, "step": 5642 }, { "epoch": 1.54, "grad_norm": 2.0904098542175036, "learning_rate": 5.024315636767738e-06, "loss": 0.141, "step": 5643 }, { "epoch": 1.54, "grad_norm": 1.8363973076198659, "learning_rate": 5.0228419723948976e-06, "loss": 0.125, "step": 5644 }, { "epoch": 1.54, "grad_norm": 2.033014575605109, "learning_rate": 5.021368306037786e-06, "loss": 0.1297, "step": 5645 }, { "epoch": 1.54, "grad_norm": 2.0625003428347393, "learning_rate": 5.019894637824423e-06, "loss": 0.1065, "step": 5646 }, { "epoch": 1.54, "grad_norm": 1.9558092050965044, "learning_rate": 5.0184209678828265e-06, "loss": 0.1515, "step": 5647 }, { "epoch": 1.54, "grad_norm": 1.6733726194144343, "learning_rate": 5.016947296341009e-06, "loss": 0.0969, "step": 5648 }, { "epoch": 1.54, "grad_norm": 2.01385127510367, "learning_rate": 5.015473623326992e-06, "loss": 0.1108, "step": 5649 }, { "epoch": 1.54, "grad_norm": 2.068321031230667, "learning_rate": 5.01399994896879e-06, "loss": 0.1473, "step": 5650 }, { "epoch": 1.54, "grad_norm": 1.7548321598878058, "learning_rate": 5.01252627339442e-06, "loss": 0.1107, "step": 5651 }, { "epoch": 1.54, "grad_norm": 1.8857505464503226, "learning_rate": 5.0110525967319014e-06, "loss": 0.1213, "step": 5652 }, { "epoch": 1.54, "grad_norm": 1.730373025987402, "learning_rate": 5.00957891910925e-06, "loss": 0.1174, "step": 5653 }, { "epoch": 1.54, "grad_norm": 1.6724570413217763, "learning_rate": 5.008105240654484e-06, "loss": 0.0997, "step": 5654 }, { "epoch": 1.54, "grad_norm": 1.6378072926054223, "learning_rate": 5.006631561495619e-06, "loss": 0.0948, "step": 5655 }, { "epoch": 1.54, "grad_norm": 1.7758720818611609, "learning_rate": 5.005157881760676e-06, "loss": 0.0964, "step": 5656 }, { "epoch": 1.54, "grad_norm": 1.7962637614290708, "learning_rate": 5.003684201577671e-06, "loss": 0.1186, "step": 5657 }, { "epoch": 1.54, "grad_norm": 2.2655690768766648, "learning_rate": 5.00221052107462e-06, "loss": 0.1393, "step": 5658 }, { "epoch": 1.54, "grad_norm": 1.7777998570584177, "learning_rate": 5.0007368403795445e-06, "loss": 0.1143, "step": 5659 }, { "epoch": 1.55, "grad_norm": 1.7558897600856012, "learning_rate": 4.999263159620457e-06, "loss": 0.1045, "step": 5660 }, { "epoch": 1.55, "grad_norm": 2.031210830016424, "learning_rate": 4.997789478925381e-06, "loss": 0.1235, "step": 5661 }, { "epoch": 1.55, "grad_norm": 1.934091233045082, "learning_rate": 4.996315798422331e-06, "loss": 0.1295, "step": 5662 }, { "epoch": 1.55, "grad_norm": 1.7280359998980814, "learning_rate": 4.9948421182393255e-06, "loss": 0.1068, "step": 5663 }, { "epoch": 1.55, "grad_norm": 1.8785839450477815, "learning_rate": 4.993368438504381e-06, "loss": 0.1148, "step": 5664 }, { "epoch": 1.55, "grad_norm": 1.9720584283676688, "learning_rate": 4.991894759345519e-06, "loss": 0.1236, "step": 5665 }, { "epoch": 1.55, "grad_norm": 2.0121207616333905, "learning_rate": 4.990421080890751e-06, "loss": 0.1233, "step": 5666 }, { "epoch": 1.55, "grad_norm": 2.1782927040738307, "learning_rate": 4.9889474032681e-06, "loss": 0.138, "step": 5667 }, { "epoch": 1.55, "grad_norm": 1.8129014974143547, "learning_rate": 4.987473726605581e-06, "loss": 0.1096, "step": 5668 }, { "epoch": 1.55, "grad_norm": 1.803896452146066, "learning_rate": 4.986000051031212e-06, "loss": 0.118, "step": 5669 }, { "epoch": 1.55, "grad_norm": 2.067597732413137, "learning_rate": 4.98452637667301e-06, "loss": 0.152, "step": 5670 }, { "epoch": 1.55, "grad_norm": 1.9111122051385818, "learning_rate": 4.983052703658993e-06, "loss": 0.1319, "step": 5671 }, { "epoch": 1.55, "grad_norm": 1.9427129436199546, "learning_rate": 4.981579032117175e-06, "loss": 0.1187, "step": 5672 }, { "epoch": 1.55, "grad_norm": 1.658142341628866, "learning_rate": 4.980105362175579e-06, "loss": 0.1097, "step": 5673 }, { "epoch": 1.55, "grad_norm": 1.6511248088283692, "learning_rate": 4.978631693962216e-06, "loss": 0.0994, "step": 5674 }, { "epoch": 1.55, "grad_norm": 1.6356809147118867, "learning_rate": 4.977158027605105e-06, "loss": 0.0946, "step": 5675 }, { "epoch": 1.55, "grad_norm": 1.9975584833690718, "learning_rate": 4.975684363232263e-06, "loss": 0.1248, "step": 5676 }, { "epoch": 1.55, "grad_norm": 1.8369968635811522, "learning_rate": 4.974210700971706e-06, "loss": 0.1246, "step": 5677 }, { "epoch": 1.55, "grad_norm": 1.8593771112850501, "learning_rate": 4.972737040951448e-06, "loss": 0.1309, "step": 5678 }, { "epoch": 1.55, "grad_norm": 1.747340510580516, "learning_rate": 4.971263383299509e-06, "loss": 0.0978, "step": 5679 }, { "epoch": 1.55, "grad_norm": 1.6215517704741043, "learning_rate": 4.969789728143902e-06, "loss": 0.0942, "step": 5680 }, { "epoch": 1.55, "grad_norm": 1.75672850115425, "learning_rate": 4.968316075612643e-06, "loss": 0.102, "step": 5681 }, { "epoch": 1.55, "grad_norm": 1.9669161735713907, "learning_rate": 4.966842425833748e-06, "loss": 0.1299, "step": 5682 }, { "epoch": 1.55, "grad_norm": 1.901679111324784, "learning_rate": 4.965368778935231e-06, "loss": 0.1469, "step": 5683 }, { "epoch": 1.55, "grad_norm": 1.7768482313776948, "learning_rate": 4.963895135045106e-06, "loss": 0.1036, "step": 5684 }, { "epoch": 1.55, "grad_norm": 1.6277421141176784, "learning_rate": 4.9624214942913916e-06, "loss": 0.1008, "step": 5685 }, { "epoch": 1.55, "grad_norm": 1.8337856441661124, "learning_rate": 4.960947856802097e-06, "loss": 0.1192, "step": 5686 }, { "epoch": 1.55, "grad_norm": 1.780159456757161, "learning_rate": 4.959474222705241e-06, "loss": 0.1044, "step": 5687 }, { "epoch": 1.55, "grad_norm": 1.6007562662347046, "learning_rate": 4.958000592128834e-06, "loss": 0.0974, "step": 5688 }, { "epoch": 1.55, "grad_norm": 1.6694203916932255, "learning_rate": 4.956526965200891e-06, "loss": 0.1044, "step": 5689 }, { "epoch": 1.55, "grad_norm": 1.653217059591154, "learning_rate": 4.9550533420494216e-06, "loss": 0.0854, "step": 5690 }, { "epoch": 1.55, "grad_norm": 1.8819241939426945, "learning_rate": 4.953579722802444e-06, "loss": 0.1137, "step": 5691 }, { "epoch": 1.55, "grad_norm": 1.8651468200409407, "learning_rate": 4.952106107587967e-06, "loss": 0.1148, "step": 5692 }, { "epoch": 1.55, "grad_norm": 1.7526053626772513, "learning_rate": 4.950632496534004e-06, "loss": 0.0919, "step": 5693 }, { "epoch": 1.55, "grad_norm": 1.8310157198213042, "learning_rate": 4.949158889768566e-06, "loss": 0.106, "step": 5694 }, { "epoch": 1.55, "grad_norm": 2.1219930115365644, "learning_rate": 4.9476852874196665e-06, "loss": 0.1125, "step": 5695 }, { "epoch": 1.56, "grad_norm": 1.837587959049806, "learning_rate": 4.9462116896153115e-06, "loss": 0.0934, "step": 5696 }, { "epoch": 1.56, "grad_norm": 1.9998489226606917, "learning_rate": 4.9447380964835165e-06, "loss": 0.1238, "step": 5697 }, { "epoch": 1.56, "grad_norm": 2.3671861825266167, "learning_rate": 4.94326450815229e-06, "loss": 0.1389, "step": 5698 }, { "epoch": 1.56, "grad_norm": 2.1497990502985274, "learning_rate": 4.9417909247496415e-06, "loss": 0.1142, "step": 5699 }, { "epoch": 1.56, "grad_norm": 2.033372234976445, "learning_rate": 4.94031734640358e-06, "loss": 0.1321, "step": 5700 }, { "epoch": 1.56, "grad_norm": 1.7093406378908598, "learning_rate": 4.938843773242115e-06, "loss": 0.101, "step": 5701 }, { "epoch": 1.56, "grad_norm": 1.9523284450374463, "learning_rate": 4.9373702053932534e-06, "loss": 0.1178, "step": 5702 }, { "epoch": 1.56, "grad_norm": 2.0611238166998405, "learning_rate": 4.935896642985006e-06, "loss": 0.1358, "step": 5703 }, { "epoch": 1.56, "grad_norm": 1.8091374609670188, "learning_rate": 4.934423086145379e-06, "loss": 0.1021, "step": 5704 }, { "epoch": 1.56, "grad_norm": 1.8315061251008664, "learning_rate": 4.932949535002379e-06, "loss": 0.1307, "step": 5705 }, { "epoch": 1.56, "grad_norm": 1.5752788518998808, "learning_rate": 4.9314759896840115e-06, "loss": 0.0932, "step": 5706 }, { "epoch": 1.56, "grad_norm": 1.8289836160182324, "learning_rate": 4.930002450318282e-06, "loss": 0.124, "step": 5707 }, { "epoch": 1.56, "grad_norm": 1.9310853166360196, "learning_rate": 4.928528917033201e-06, "loss": 0.1062, "step": 5708 }, { "epoch": 1.56, "grad_norm": 1.818647996449721, "learning_rate": 4.927055389956768e-06, "loss": 0.1291, "step": 5709 }, { "epoch": 1.56, "grad_norm": 1.9569161687472154, "learning_rate": 4.925581869216991e-06, "loss": 0.1173, "step": 5710 }, { "epoch": 1.56, "grad_norm": 1.667077688058881, "learning_rate": 4.9241083549418714e-06, "loss": 0.0874, "step": 5711 }, { "epoch": 1.56, "grad_norm": 1.762875435620747, "learning_rate": 4.922634847259415e-06, "loss": 0.1176, "step": 5712 }, { "epoch": 1.56, "grad_norm": 1.903497213373195, "learning_rate": 4.92116134629762e-06, "loss": 0.1141, "step": 5713 }, { "epoch": 1.56, "grad_norm": 1.8133803376171806, "learning_rate": 4.919687852184493e-06, "loss": 0.1169, "step": 5714 }, { "epoch": 1.56, "grad_norm": 1.8570255570873222, "learning_rate": 4.918214365048034e-06, "loss": 0.1258, "step": 5715 }, { "epoch": 1.56, "grad_norm": 1.9009246373204074, "learning_rate": 4.916740885016244e-06, "loss": 0.1163, "step": 5716 }, { "epoch": 1.56, "grad_norm": 1.6720574676633757, "learning_rate": 4.9152674122171235e-06, "loss": 0.0971, "step": 5717 }, { "epoch": 1.56, "grad_norm": 1.8093878511718644, "learning_rate": 4.9137939467786724e-06, "loss": 0.1169, "step": 5718 }, { "epoch": 1.56, "grad_norm": 1.7209826144512443, "learning_rate": 4.912320488828887e-06, "loss": 0.1037, "step": 5719 }, { "epoch": 1.56, "grad_norm": 1.609029004903627, "learning_rate": 4.910847038495771e-06, "loss": 0.1028, "step": 5720 }, { "epoch": 1.56, "grad_norm": 1.9421893024123071, "learning_rate": 4.909373595907317e-06, "loss": 0.1122, "step": 5721 }, { "epoch": 1.56, "grad_norm": 1.7453372901433868, "learning_rate": 4.907900161191527e-06, "loss": 0.1199, "step": 5722 }, { "epoch": 1.56, "grad_norm": 1.7006496328797378, "learning_rate": 4.9064267344763924e-06, "loss": 0.1082, "step": 5723 }, { "epoch": 1.56, "grad_norm": 1.7374672099068977, "learning_rate": 4.904953315889912e-06, "loss": 0.1127, "step": 5724 }, { "epoch": 1.56, "grad_norm": 1.828307663872893, "learning_rate": 4.9034799055600785e-06, "loss": 0.117, "step": 5725 }, { "epoch": 1.56, "grad_norm": 1.7282872858421505, "learning_rate": 4.9020065036148885e-06, "loss": 0.1232, "step": 5726 }, { "epoch": 1.56, "grad_norm": 1.8154254284960079, "learning_rate": 4.900533110182335e-06, "loss": 0.1162, "step": 5727 }, { "epoch": 1.56, "grad_norm": 1.7500849027564973, "learning_rate": 4.899059725390412e-06, "loss": 0.1127, "step": 5728 }, { "epoch": 1.56, "grad_norm": 1.9075675736820878, "learning_rate": 4.897586349367107e-06, "loss": 0.0996, "step": 5729 }, { "epoch": 1.56, "grad_norm": 1.9859964943920791, "learning_rate": 4.896112982240417e-06, "loss": 0.133, "step": 5730 }, { "epoch": 1.56, "grad_norm": 1.684623806492088, "learning_rate": 4.894639624138327e-06, "loss": 0.112, "step": 5731 }, { "epoch": 1.56, "grad_norm": 1.7481052846918421, "learning_rate": 4.893166275188831e-06, "loss": 0.0996, "step": 5732 }, { "epoch": 1.57, "grad_norm": 1.8416961533704146, "learning_rate": 4.891692935519917e-06, "loss": 0.12, "step": 5733 }, { "epoch": 1.57, "grad_norm": 1.7881593786151584, "learning_rate": 4.8902196052595725e-06, "loss": 0.1069, "step": 5734 }, { "epoch": 1.57, "grad_norm": 1.9102205765429747, "learning_rate": 4.888746284535784e-06, "loss": 0.1134, "step": 5735 }, { "epoch": 1.57, "grad_norm": 1.6806145859210895, "learning_rate": 4.88727297347654e-06, "loss": 0.1062, "step": 5736 }, { "epoch": 1.57, "grad_norm": 1.5584932563044123, "learning_rate": 4.885799672209823e-06, "loss": 0.0968, "step": 5737 }, { "epoch": 1.57, "grad_norm": 1.7434458052038524, "learning_rate": 4.8843263808636225e-06, "loss": 0.122, "step": 5738 }, { "epoch": 1.57, "grad_norm": 1.7821129088055114, "learning_rate": 4.8828530995659185e-06, "loss": 0.109, "step": 5739 }, { "epoch": 1.57, "grad_norm": 1.9620004057843237, "learning_rate": 4.881379828444696e-06, "loss": 0.1327, "step": 5740 }, { "epoch": 1.57, "grad_norm": 1.7635306355476654, "learning_rate": 4.8799065676279354e-06, "loss": 0.1267, "step": 5741 }, { "epoch": 1.57, "grad_norm": 1.7682174779191964, "learning_rate": 4.878433317243621e-06, "loss": 0.1241, "step": 5742 }, { "epoch": 1.57, "grad_norm": 1.713173004251655, "learning_rate": 4.8769600774197285e-06, "loss": 0.1133, "step": 5743 }, { "epoch": 1.57, "grad_norm": 1.753934743386582, "learning_rate": 4.875486848284243e-06, "loss": 0.0962, "step": 5744 }, { "epoch": 1.57, "grad_norm": 1.694013358194294, "learning_rate": 4.874013629965138e-06, "loss": 0.0966, "step": 5745 }, { "epoch": 1.57, "grad_norm": 1.6081367182112725, "learning_rate": 4.872540422590395e-06, "loss": 0.1016, "step": 5746 }, { "epoch": 1.57, "grad_norm": 1.6497513420626597, "learning_rate": 4.871067226287988e-06, "loss": 0.1063, "step": 5747 }, { "epoch": 1.57, "grad_norm": 1.9007595261958676, "learning_rate": 4.869594041185895e-06, "loss": 0.1314, "step": 5748 }, { "epoch": 1.57, "grad_norm": 1.8466426882191314, "learning_rate": 4.868120867412085e-06, "loss": 0.1069, "step": 5749 }, { "epoch": 1.57, "grad_norm": 1.8612378106663066, "learning_rate": 4.866647705094541e-06, "loss": 0.1216, "step": 5750 }, { "epoch": 1.57, "grad_norm": 1.8405144033828842, "learning_rate": 4.865174554361228e-06, "loss": 0.1044, "step": 5751 }, { "epoch": 1.57, "grad_norm": 1.6995991271554405, "learning_rate": 4.863701415340122e-06, "loss": 0.0995, "step": 5752 }, { "epoch": 1.57, "grad_norm": 1.7552008637991825, "learning_rate": 4.862228288159191e-06, "loss": 0.1108, "step": 5753 }, { "epoch": 1.57, "grad_norm": 1.6071088609528863, "learning_rate": 4.8607551729464066e-06, "loss": 0.1033, "step": 5754 }, { "epoch": 1.57, "grad_norm": 1.8477585095269573, "learning_rate": 4.859282069829735e-06, "loss": 0.1084, "step": 5755 }, { "epoch": 1.57, "grad_norm": 1.8963890238903187, "learning_rate": 4.8578089789371476e-06, "loss": 0.127, "step": 5756 }, { "epoch": 1.57, "grad_norm": 1.6656099080347362, "learning_rate": 4.856335900396607e-06, "loss": 0.109, "step": 5757 }, { "epoch": 1.57, "grad_norm": 1.7733985607823848, "learning_rate": 4.854862834336082e-06, "loss": 0.1092, "step": 5758 }, { "epoch": 1.57, "grad_norm": 1.8727722672515896, "learning_rate": 4.853389780883535e-06, "loss": 0.1228, "step": 5759 }, { "epoch": 1.57, "grad_norm": 1.8176649994104128, "learning_rate": 4.85191674016693e-06, "loss": 0.133, "step": 5760 }, { "epoch": 1.57, "grad_norm": 1.798999066859521, "learning_rate": 4.850443712314226e-06, "loss": 0.1235, "step": 5761 }, { "epoch": 1.57, "grad_norm": 1.8658596334148467, "learning_rate": 4.84897069745339e-06, "loss": 0.1091, "step": 5762 }, { "epoch": 1.57, "grad_norm": 1.634225858589599, "learning_rate": 4.847497695712378e-06, "loss": 0.1003, "step": 5763 }, { "epoch": 1.57, "grad_norm": 1.5401341581020818, "learning_rate": 4.846024707219149e-06, "loss": 0.1069, "step": 5764 }, { "epoch": 1.57, "grad_norm": 1.6183584850698047, "learning_rate": 4.844551732101662e-06, "loss": 0.0941, "step": 5765 }, { "epoch": 1.57, "grad_norm": 1.6543020518878626, "learning_rate": 4.8430787704878725e-06, "loss": 0.1039, "step": 5766 }, { "epoch": 1.57, "grad_norm": 2.057836347452287, "learning_rate": 4.841605822505734e-06, "loss": 0.1348, "step": 5767 }, { "epoch": 1.57, "grad_norm": 1.7612813463632422, "learning_rate": 4.840132888283205e-06, "loss": 0.1073, "step": 5768 }, { "epoch": 1.57, "grad_norm": 1.7488480715678698, "learning_rate": 4.838659967948234e-06, "loss": 0.1007, "step": 5769 }, { "epoch": 1.58, "grad_norm": 1.949305034389504, "learning_rate": 4.837187061628777e-06, "loss": 0.1195, "step": 5770 }, { "epoch": 1.58, "grad_norm": 1.8768754169423492, "learning_rate": 4.835714169452781e-06, "loss": 0.0951, "step": 5771 }, { "epoch": 1.58, "grad_norm": 1.6872155994730111, "learning_rate": 4.8342412915481965e-06, "loss": 0.1053, "step": 5772 }, { "epoch": 1.58, "grad_norm": 1.757100945986478, "learning_rate": 4.832768428042969e-06, "loss": 0.1136, "step": 5773 }, { "epoch": 1.58, "grad_norm": 1.8266618111570159, "learning_rate": 4.83129557906505e-06, "loss": 0.1006, "step": 5774 }, { "epoch": 1.58, "grad_norm": 1.8800910902856227, "learning_rate": 4.829822744742383e-06, "loss": 0.1024, "step": 5775 }, { "epoch": 1.58, "grad_norm": 1.7789115076420592, "learning_rate": 4.828349925202912e-06, "loss": 0.1152, "step": 5776 }, { "epoch": 1.58, "grad_norm": 1.64202071672074, "learning_rate": 4.826877120574579e-06, "loss": 0.1136, "step": 5777 }, { "epoch": 1.58, "grad_norm": 2.0242727417068327, "learning_rate": 4.825404330985328e-06, "loss": 0.1367, "step": 5778 }, { "epoch": 1.58, "grad_norm": 1.7802500752376826, "learning_rate": 4.823931556563094e-06, "loss": 0.1041, "step": 5779 }, { "epoch": 1.58, "grad_norm": 2.0323776150631527, "learning_rate": 4.822458797435824e-06, "loss": 0.1312, "step": 5780 }, { "epoch": 1.58, "grad_norm": 1.8693817759850502, "learning_rate": 4.8209860537314504e-06, "loss": 0.1168, "step": 5781 }, { "epoch": 1.58, "grad_norm": 1.9952595945622615, "learning_rate": 4.819513325577911e-06, "loss": 0.119, "step": 5782 }, { "epoch": 1.58, "grad_norm": 1.7791461906356476, "learning_rate": 4.818040613103139e-06, "loss": 0.1128, "step": 5783 }, { "epoch": 1.58, "grad_norm": 1.9572449691796707, "learning_rate": 4.816567916435072e-06, "loss": 0.1208, "step": 5784 }, { "epoch": 1.58, "grad_norm": 2.0010546884517972, "learning_rate": 4.815095235701637e-06, "loss": 0.1228, "step": 5785 }, { "epoch": 1.58, "grad_norm": 1.9309152984072147, "learning_rate": 4.81362257103077e-06, "loss": 0.1345, "step": 5786 }, { "epoch": 1.58, "grad_norm": 1.8454420843006936, "learning_rate": 4.8121499225503974e-06, "loss": 0.0998, "step": 5787 }, { "epoch": 1.58, "grad_norm": 1.778736418162696, "learning_rate": 4.810677290388449e-06, "loss": 0.1199, "step": 5788 }, { "epoch": 1.58, "grad_norm": 1.6879138017801798, "learning_rate": 4.80920467467285e-06, "loss": 0.0982, "step": 5789 }, { "epoch": 1.58, "grad_norm": 1.7918692659552269, "learning_rate": 4.807732075531527e-06, "loss": 0.1271, "step": 5790 }, { "epoch": 1.58, "grad_norm": 1.7341349136829614, "learning_rate": 4.8062594930924015e-06, "loss": 0.1221, "step": 5791 }, { "epoch": 1.58, "grad_norm": 1.60675201915511, "learning_rate": 4.804786927483399e-06, "loss": 0.1066, "step": 5792 }, { "epoch": 1.58, "grad_norm": 1.7868072677455564, "learning_rate": 4.803314378832437e-06, "loss": 0.1107, "step": 5793 }, { "epoch": 1.58, "grad_norm": 1.9284059809003662, "learning_rate": 4.801841847267439e-06, "loss": 0.1371, "step": 5794 }, { "epoch": 1.58, "grad_norm": 1.7601085819950688, "learning_rate": 4.800369332916319e-06, "loss": 0.1083, "step": 5795 }, { "epoch": 1.58, "grad_norm": 1.8269185248881448, "learning_rate": 4.7988968359069965e-06, "loss": 0.1242, "step": 5796 }, { "epoch": 1.58, "grad_norm": 1.6697723497919919, "learning_rate": 4.797424356367383e-06, "loss": 0.1108, "step": 5797 }, { "epoch": 1.58, "grad_norm": 1.8184438258718252, "learning_rate": 4.795951894425396e-06, "loss": 0.1209, "step": 5798 }, { "epoch": 1.58, "grad_norm": 1.8167479408872471, "learning_rate": 4.794479450208944e-06, "loss": 0.0974, "step": 5799 }, { "epoch": 1.58, "grad_norm": 1.7001557135695522, "learning_rate": 4.793007023845939e-06, "loss": 0.094, "step": 5800 }, { "epoch": 1.58, "grad_norm": 1.9240326244901658, "learning_rate": 4.79153461546429e-06, "loss": 0.1194, "step": 5801 }, { "epoch": 1.58, "grad_norm": 1.900376832500559, "learning_rate": 4.790062225191902e-06, "loss": 0.1189, "step": 5802 }, { "epoch": 1.58, "grad_norm": 1.8746660758595346, "learning_rate": 4.788589853156685e-06, "loss": 0.1318, "step": 5803 }, { "epoch": 1.58, "grad_norm": 1.5530386367504285, "learning_rate": 4.787117499486539e-06, "loss": 0.0888, "step": 5804 }, { "epoch": 1.58, "grad_norm": 1.9014417800591763, "learning_rate": 4.785645164309368e-06, "loss": 0.1265, "step": 5805 }, { "epoch": 1.59, "grad_norm": 1.8352995595755826, "learning_rate": 4.784172847753073e-06, "loss": 0.1172, "step": 5806 }, { "epoch": 1.59, "grad_norm": 1.7906503847992112, "learning_rate": 4.782700549945554e-06, "loss": 0.1043, "step": 5807 }, { "epoch": 1.59, "grad_norm": 1.6622055077269564, "learning_rate": 4.781228271014704e-06, "loss": 0.0897, "step": 5808 }, { "epoch": 1.59, "grad_norm": 1.9378819097600386, "learning_rate": 4.779756011088427e-06, "loss": 0.123, "step": 5809 }, { "epoch": 1.59, "grad_norm": 1.8986105258992039, "learning_rate": 4.778283770294611e-06, "loss": 0.1349, "step": 5810 }, { "epoch": 1.59, "grad_norm": 1.9373484793514644, "learning_rate": 4.776811548761151e-06, "loss": 0.12, "step": 5811 }, { "epoch": 1.59, "grad_norm": 1.8488450062922406, "learning_rate": 4.775339346615937e-06, "loss": 0.1196, "step": 5812 }, { "epoch": 1.59, "grad_norm": 1.9135308776486715, "learning_rate": 4.773867163986861e-06, "loss": 0.1189, "step": 5813 }, { "epoch": 1.59, "grad_norm": 1.653273620121883, "learning_rate": 4.772395001001805e-06, "loss": 0.1092, "step": 5814 }, { "epoch": 1.59, "grad_norm": 1.8324525133036889, "learning_rate": 4.770922857788662e-06, "loss": 0.106, "step": 5815 }, { "epoch": 1.59, "grad_norm": 1.8954599507751637, "learning_rate": 4.769450734475311e-06, "loss": 0.1129, "step": 5816 }, { "epoch": 1.59, "grad_norm": 1.885755000372393, "learning_rate": 4.767978631189637e-06, "loss": 0.1081, "step": 5817 }, { "epoch": 1.59, "grad_norm": 2.272605380914167, "learning_rate": 4.766506548059519e-06, "loss": 0.1305, "step": 5818 }, { "epoch": 1.59, "grad_norm": 1.687725613498644, "learning_rate": 4.765034485212838e-06, "loss": 0.0974, "step": 5819 }, { "epoch": 1.59, "grad_norm": 2.1352277805682323, "learning_rate": 4.763562442777468e-06, "loss": 0.1372, "step": 5820 }, { "epoch": 1.59, "grad_norm": 1.9097599706014994, "learning_rate": 4.762090420881289e-06, "loss": 0.1128, "step": 5821 }, { "epoch": 1.59, "grad_norm": 1.5309999626312183, "learning_rate": 4.760618419652171e-06, "loss": 0.096, "step": 5822 }, { "epoch": 1.59, "grad_norm": 1.746586246681869, "learning_rate": 4.759146439217988e-06, "loss": 0.0986, "step": 5823 }, { "epoch": 1.59, "grad_norm": 1.5348836153514618, "learning_rate": 4.757674479706608e-06, "loss": 0.1065, "step": 5824 }, { "epoch": 1.59, "grad_norm": 1.8055086328004646, "learning_rate": 4.756202541245901e-06, "loss": 0.1004, "step": 5825 }, { "epoch": 1.59, "grad_norm": 1.7819694254964096, "learning_rate": 4.7547306239637314e-06, "loss": 0.1217, "step": 5826 }, { "epoch": 1.59, "grad_norm": 2.213888238903891, "learning_rate": 4.753258727987967e-06, "loss": 0.142, "step": 5827 }, { "epoch": 1.59, "grad_norm": 1.9114142111373016, "learning_rate": 4.751786853446467e-06, "loss": 0.1298, "step": 5828 }, { "epoch": 1.59, "grad_norm": 2.1320990677164704, "learning_rate": 4.750315000467096e-06, "loss": 0.1249, "step": 5829 }, { "epoch": 1.59, "grad_norm": 1.8533968633744407, "learning_rate": 4.74884316917771e-06, "loss": 0.1153, "step": 5830 }, { "epoch": 1.59, "grad_norm": 1.88040365841761, "learning_rate": 4.747371359706167e-06, "loss": 0.1237, "step": 5831 }, { "epoch": 1.59, "grad_norm": 1.6644013164049285, "learning_rate": 4.74589957218032e-06, "loss": 0.0834, "step": 5832 }, { "epoch": 1.59, "grad_norm": 1.763009551105408, "learning_rate": 4.7444278067280275e-06, "loss": 0.1009, "step": 5833 }, { "epoch": 1.59, "grad_norm": 1.8901838968526794, "learning_rate": 4.742956063477136e-06, "loss": 0.1086, "step": 5834 }, { "epoch": 1.59, "grad_norm": 1.8874826789291914, "learning_rate": 4.741484342555498e-06, "loss": 0.1203, "step": 5835 }, { "epoch": 1.59, "grad_norm": 2.2014894923974317, "learning_rate": 4.7400126440909595e-06, "loss": 0.1168, "step": 5836 }, { "epoch": 1.59, "grad_norm": 2.0727115741286912, "learning_rate": 4.738540968211367e-06, "loss": 0.1119, "step": 5837 }, { "epoch": 1.59, "grad_norm": 1.7702872967757892, "learning_rate": 4.737069315044562e-06, "loss": 0.1127, "step": 5838 }, { "epoch": 1.59, "grad_norm": 1.974011891411579, "learning_rate": 4.735597684718389e-06, "loss": 0.1352, "step": 5839 }, { "epoch": 1.59, "grad_norm": 1.7997201859376395, "learning_rate": 4.734126077360685e-06, "loss": 0.1156, "step": 5840 }, { "epoch": 1.59, "grad_norm": 1.6977115954645179, "learning_rate": 4.7326544930992905e-06, "loss": 0.1208, "step": 5841 }, { "epoch": 1.59, "grad_norm": 1.6120726414039799, "learning_rate": 4.7311829320620384e-06, "loss": 0.0975, "step": 5842 }, { "epoch": 1.6, "grad_norm": 1.9216844667439537, "learning_rate": 4.729711394376765e-06, "loss": 0.1281, "step": 5843 }, { "epoch": 1.6, "grad_norm": 1.5732382219691132, "learning_rate": 4.728239880171298e-06, "loss": 0.0946, "step": 5844 }, { "epoch": 1.6, "grad_norm": 1.5587617194953125, "learning_rate": 4.726768389573471e-06, "loss": 0.1082, "step": 5845 }, { "epoch": 1.6, "grad_norm": 1.807657434962819, "learning_rate": 4.725296922711109e-06, "loss": 0.101, "step": 5846 }, { "epoch": 1.6, "grad_norm": 1.7152001206403256, "learning_rate": 4.723825479712039e-06, "loss": 0.1264, "step": 5847 }, { "epoch": 1.6, "grad_norm": 1.9446942358779278, "learning_rate": 4.722354060704083e-06, "loss": 0.1295, "step": 5848 }, { "epoch": 1.6, "grad_norm": 1.7303020307657297, "learning_rate": 4.720882665815064e-06, "loss": 0.1137, "step": 5849 }, { "epoch": 1.6, "grad_norm": 1.7861039588568814, "learning_rate": 4.719411295172797e-06, "loss": 0.0962, "step": 5850 }, { "epoch": 1.6, "grad_norm": 1.764737303748098, "learning_rate": 4.717939948905106e-06, "loss": 0.1076, "step": 5851 }, { "epoch": 1.6, "grad_norm": 1.9657570521919652, "learning_rate": 4.7164686271398005e-06, "loss": 0.1249, "step": 5852 }, { "epoch": 1.6, "grad_norm": 1.8603412528297958, "learning_rate": 4.714997330004696e-06, "loss": 0.126, "step": 5853 }, { "epoch": 1.6, "grad_norm": 1.9178296228055158, "learning_rate": 4.713526057627601e-06, "loss": 0.1032, "step": 5854 }, { "epoch": 1.6, "grad_norm": 1.7848120414476905, "learning_rate": 4.712054810136327e-06, "loss": 0.1088, "step": 5855 }, { "epoch": 1.6, "grad_norm": 2.227612031919105, "learning_rate": 4.710583587658675e-06, "loss": 0.1602, "step": 5856 }, { "epoch": 1.6, "grad_norm": 1.9106229480430856, "learning_rate": 4.709112390322456e-06, "loss": 0.1187, "step": 5857 }, { "epoch": 1.6, "grad_norm": 1.7438963947927832, "learning_rate": 4.707641218255468e-06, "loss": 0.1088, "step": 5858 }, { "epoch": 1.6, "grad_norm": 1.9839585538762703, "learning_rate": 4.706170071585513e-06, "loss": 0.1248, "step": 5859 }, { "epoch": 1.6, "grad_norm": 1.6212696133177271, "learning_rate": 4.704698950440386e-06, "loss": 0.0981, "step": 5860 }, { "epoch": 1.6, "grad_norm": 1.8134499348071371, "learning_rate": 4.703227854947884e-06, "loss": 0.1167, "step": 5861 }, { "epoch": 1.6, "grad_norm": 1.7978405292640642, "learning_rate": 4.701756785235798e-06, "loss": 0.0957, "step": 5862 }, { "epoch": 1.6, "grad_norm": 1.6951695522737673, "learning_rate": 4.700285741431924e-06, "loss": 0.1036, "step": 5863 }, { "epoch": 1.6, "grad_norm": 1.8436728595925593, "learning_rate": 4.698814723664046e-06, "loss": 0.1159, "step": 5864 }, { "epoch": 1.6, "grad_norm": 1.5854219462219072, "learning_rate": 4.697343732059953e-06, "loss": 0.0985, "step": 5865 }, { "epoch": 1.6, "grad_norm": 1.8573442487280667, "learning_rate": 4.695872766747427e-06, "loss": 0.0983, "step": 5866 }, { "epoch": 1.6, "grad_norm": 1.7640963539350536, "learning_rate": 4.694401827854252e-06, "loss": 0.1161, "step": 5867 }, { "epoch": 1.6, "grad_norm": 1.8738824373646115, "learning_rate": 4.6929309155082045e-06, "loss": 0.1245, "step": 5868 }, { "epoch": 1.6, "grad_norm": 1.8557139389586945, "learning_rate": 4.691460029837066e-06, "loss": 0.1277, "step": 5869 }, { "epoch": 1.6, "grad_norm": 1.5719878205675166, "learning_rate": 4.689989170968609e-06, "loss": 0.1064, "step": 5870 }, { "epoch": 1.6, "grad_norm": 1.9914432579846897, "learning_rate": 4.688518339030607e-06, "loss": 0.1344, "step": 5871 }, { "epoch": 1.6, "grad_norm": 1.5228425687207567, "learning_rate": 4.687047534150829e-06, "loss": 0.0802, "step": 5872 }, { "epoch": 1.6, "grad_norm": 1.8003761930528475, "learning_rate": 4.685576756457044e-06, "loss": 0.1148, "step": 5873 }, { "epoch": 1.6, "grad_norm": 1.650483536474264, "learning_rate": 4.684106006077015e-06, "loss": 0.1021, "step": 5874 }, { "epoch": 1.6, "grad_norm": 1.6761870246654584, "learning_rate": 4.682635283138511e-06, "loss": 0.1089, "step": 5875 }, { "epoch": 1.6, "grad_norm": 1.9133238556564902, "learning_rate": 4.681164587769287e-06, "loss": 0.1065, "step": 5876 }, { "epoch": 1.6, "grad_norm": 1.6507591831526423, "learning_rate": 4.679693920097105e-06, "loss": 0.1014, "step": 5877 }, { "epoch": 1.6, "grad_norm": 1.8866229060608464, "learning_rate": 4.678223280249718e-06, "loss": 0.1058, "step": 5878 }, { "epoch": 1.6, "grad_norm": 1.9760817917867326, "learning_rate": 4.676752668354884e-06, "loss": 0.1156, "step": 5879 }, { "epoch": 1.61, "grad_norm": 1.7839330291960105, "learning_rate": 4.675282084540348e-06, "loss": 0.1127, "step": 5880 }, { "epoch": 1.61, "grad_norm": 1.4124860362722247, "learning_rate": 4.673811528933865e-06, "loss": 0.0838, "step": 5881 }, { "epoch": 1.61, "grad_norm": 1.9095096221208152, "learning_rate": 4.672341001663178e-06, "loss": 0.1168, "step": 5882 }, { "epoch": 1.61, "grad_norm": 1.6278396460492184, "learning_rate": 4.670870502856031e-06, "loss": 0.1091, "step": 5883 }, { "epoch": 1.61, "grad_norm": 1.6741461912256008, "learning_rate": 4.669400032640165e-06, "loss": 0.1135, "step": 5884 }, { "epoch": 1.61, "grad_norm": 1.482529975352485, "learning_rate": 4.6679295911433215e-06, "loss": 0.0866, "step": 5885 }, { "epoch": 1.61, "grad_norm": 1.7964494809704494, "learning_rate": 4.666459178493232e-06, "loss": 0.1097, "step": 5886 }, { "epoch": 1.61, "grad_norm": 1.6443770655683032, "learning_rate": 4.664988794817637e-06, "loss": 0.1117, "step": 5887 }, { "epoch": 1.61, "grad_norm": 1.989703416048947, "learning_rate": 4.66351844024426e-06, "loss": 0.1316, "step": 5888 }, { "epoch": 1.61, "grad_norm": 1.8195507552922463, "learning_rate": 4.662048114900837e-06, "loss": 0.1068, "step": 5889 }, { "epoch": 1.61, "grad_norm": 1.8281156687574072, "learning_rate": 4.66057781891509e-06, "loss": 0.1125, "step": 5890 }, { "epoch": 1.61, "grad_norm": 1.8648349936389876, "learning_rate": 4.659107552414744e-06, "loss": 0.1343, "step": 5891 }, { "epoch": 1.61, "grad_norm": 1.8575779782882569, "learning_rate": 4.657637315527519e-06, "loss": 0.1106, "step": 5892 }, { "epoch": 1.61, "grad_norm": 1.8685792018228158, "learning_rate": 4.656167108381135e-06, "loss": 0.1264, "step": 5893 }, { "epoch": 1.61, "grad_norm": 1.9297831148882634, "learning_rate": 4.65469693110331e-06, "loss": 0.121, "step": 5894 }, { "epoch": 1.61, "grad_norm": 1.921258402958159, "learning_rate": 4.653226783821753e-06, "loss": 0.1128, "step": 5895 }, { "epoch": 1.61, "grad_norm": 1.5968018096518126, "learning_rate": 4.651756666664178e-06, "loss": 0.1043, "step": 5896 }, { "epoch": 1.61, "grad_norm": 1.7831648443247416, "learning_rate": 4.650286579758291e-06, "loss": 0.112, "step": 5897 }, { "epoch": 1.61, "grad_norm": 1.8452065215787334, "learning_rate": 4.6488165232318e-06, "loss": 0.1046, "step": 5898 }, { "epoch": 1.61, "grad_norm": 1.8055015068544278, "learning_rate": 4.647346497212406e-06, "loss": 0.1177, "step": 5899 }, { "epoch": 1.61, "grad_norm": 1.8288518671024598, "learning_rate": 4.6458765018278104e-06, "loss": 0.1025, "step": 5900 }, { "epoch": 1.61, "grad_norm": 1.7897386813041352, "learning_rate": 4.64440653720571e-06, "loss": 0.1027, "step": 5901 }, { "epoch": 1.61, "grad_norm": 1.7526472122341537, "learning_rate": 4.6429366034738005e-06, "loss": 0.1102, "step": 5902 }, { "epoch": 1.61, "grad_norm": 1.7037760862198994, "learning_rate": 4.641466700759772e-06, "loss": 0.1128, "step": 5903 }, { "epoch": 1.61, "grad_norm": 1.6130010370747099, "learning_rate": 4.6399968291913175e-06, "loss": 0.0972, "step": 5904 }, { "epoch": 1.61, "grad_norm": 1.5708719910355782, "learning_rate": 4.638526988896122e-06, "loss": 0.0998, "step": 5905 }, { "epoch": 1.61, "grad_norm": 1.8686336439053501, "learning_rate": 4.6370571800018695e-06, "loss": 0.124, "step": 5906 }, { "epoch": 1.61, "grad_norm": 1.733026578232837, "learning_rate": 4.635587402636241e-06, "loss": 0.0974, "step": 5907 }, { "epoch": 1.61, "grad_norm": 1.9655421975424643, "learning_rate": 4.634117656926917e-06, "loss": 0.1256, "step": 5908 }, { "epoch": 1.61, "grad_norm": 2.081120244565284, "learning_rate": 4.6326479430015715e-06, "loss": 0.1312, "step": 5909 }, { "epoch": 1.61, "grad_norm": 2.0682548297454897, "learning_rate": 4.631178260987879e-06, "loss": 0.126, "step": 5910 }, { "epoch": 1.61, "grad_norm": 1.7631987164388545, "learning_rate": 4.629708611013509e-06, "loss": 0.1147, "step": 5911 }, { "epoch": 1.61, "grad_norm": 1.7951499068402208, "learning_rate": 4.628238993206131e-06, "loss": 0.1108, "step": 5912 }, { "epoch": 1.61, "grad_norm": 1.737180170388322, "learning_rate": 4.6267694076934066e-06, "loss": 0.0889, "step": 5913 }, { "epoch": 1.61, "grad_norm": 1.952864510253775, "learning_rate": 4.625299854603e-06, "loss": 0.1317, "step": 5914 }, { "epoch": 1.61, "grad_norm": 2.0077823719376795, "learning_rate": 4.623830334062569e-06, "loss": 0.1296, "step": 5915 }, { "epoch": 1.62, "grad_norm": 1.8460052896720212, "learning_rate": 4.622360846199772e-06, "loss": 0.0962, "step": 5916 }, { "epoch": 1.62, "grad_norm": 1.7313392476325535, "learning_rate": 4.620891391142262e-06, "loss": 0.1101, "step": 5917 }, { "epoch": 1.62, "grad_norm": 1.8173263523567296, "learning_rate": 4.619421969017688e-06, "loss": 0.1241, "step": 5918 }, { "epoch": 1.62, "grad_norm": 1.5820178223866355, "learning_rate": 4.617952579953699e-06, "loss": 0.0992, "step": 5919 }, { "epoch": 1.62, "grad_norm": 1.8015485617719473, "learning_rate": 4.6164832240779405e-06, "loss": 0.1206, "step": 5920 }, { "epoch": 1.62, "grad_norm": 1.857087754955123, "learning_rate": 4.615013901518052e-06, "loss": 0.126, "step": 5921 }, { "epoch": 1.62, "grad_norm": 2.1065851785426304, "learning_rate": 4.613544612401677e-06, "loss": 0.1333, "step": 5922 }, { "epoch": 1.62, "grad_norm": 1.6800644092348458, "learning_rate": 4.612075356856447e-06, "loss": 0.1098, "step": 5923 }, { "epoch": 1.62, "grad_norm": 1.6796151338359744, "learning_rate": 4.61060613501e-06, "loss": 0.0964, "step": 5924 }, { "epoch": 1.62, "grad_norm": 1.8169100907761206, "learning_rate": 4.6091369469899634e-06, "loss": 0.1202, "step": 5925 }, { "epoch": 1.62, "grad_norm": 1.5137748084633729, "learning_rate": 4.6076677929239656e-06, "loss": 0.0873, "step": 5926 }, { "epoch": 1.62, "grad_norm": 1.764877837925718, "learning_rate": 4.606198672939628e-06, "loss": 0.1029, "step": 5927 }, { "epoch": 1.62, "grad_norm": 1.7179899907351193, "learning_rate": 4.6047295871645785e-06, "loss": 0.1062, "step": 5928 }, { "epoch": 1.62, "grad_norm": 1.789792340906551, "learning_rate": 4.603260535726432e-06, "loss": 0.1103, "step": 5929 }, { "epoch": 1.62, "grad_norm": 1.6541793464139767, "learning_rate": 4.6017915187528036e-06, "loss": 0.0951, "step": 5930 }, { "epoch": 1.62, "grad_norm": 1.725131736538696, "learning_rate": 4.6003225363713065e-06, "loss": 0.0984, "step": 5931 }, { "epoch": 1.62, "grad_norm": 2.101253603030808, "learning_rate": 4.598853588709552e-06, "loss": 0.1488, "step": 5932 }, { "epoch": 1.62, "grad_norm": 1.9099775590148313, "learning_rate": 4.597384675895142e-06, "loss": 0.1193, "step": 5933 }, { "epoch": 1.62, "grad_norm": 1.842068850354602, "learning_rate": 4.595915798055686e-06, "loss": 0.124, "step": 5934 }, { "epoch": 1.62, "grad_norm": 1.714918250138469, "learning_rate": 4.594446955318781e-06, "loss": 0.0961, "step": 5935 }, { "epoch": 1.62, "grad_norm": 1.9167393976566174, "learning_rate": 4.592978147812026e-06, "loss": 0.1113, "step": 5936 }, { "epoch": 1.62, "grad_norm": 1.613239502967477, "learning_rate": 4.591509375663014e-06, "loss": 0.1009, "step": 5937 }, { "epoch": 1.62, "grad_norm": 2.0809948036082773, "learning_rate": 4.590040638999338e-06, "loss": 0.134, "step": 5938 }, { "epoch": 1.62, "grad_norm": 1.6574741298949107, "learning_rate": 4.588571937948583e-06, "loss": 0.1062, "step": 5939 }, { "epoch": 1.62, "grad_norm": 1.662263012786599, "learning_rate": 4.587103272638339e-06, "loss": 0.1094, "step": 5940 }, { "epoch": 1.62, "grad_norm": 2.0448275718192277, "learning_rate": 4.585634643196185e-06, "loss": 0.0957, "step": 5941 }, { "epoch": 1.62, "grad_norm": 1.8607738921100478, "learning_rate": 4.584166049749701e-06, "loss": 0.1167, "step": 5942 }, { "epoch": 1.62, "grad_norm": 1.8788623559379083, "learning_rate": 4.582697492426461e-06, "loss": 0.1229, "step": 5943 }, { "epoch": 1.62, "grad_norm": 1.7507636599053813, "learning_rate": 4.581228971354042e-06, "loss": 0.117, "step": 5944 }, { "epoch": 1.62, "grad_norm": 1.597577253295051, "learning_rate": 4.579760486660006e-06, "loss": 0.0977, "step": 5945 }, { "epoch": 1.62, "grad_norm": 1.8037949381340144, "learning_rate": 4.578292038471928e-06, "loss": 0.0961, "step": 5946 }, { "epoch": 1.62, "grad_norm": 1.730924550344127, "learning_rate": 4.576823626917365e-06, "loss": 0.1108, "step": 5947 }, { "epoch": 1.62, "grad_norm": 1.750923119483021, "learning_rate": 4.575355252123881e-06, "loss": 0.1078, "step": 5948 }, { "epoch": 1.62, "grad_norm": 1.6578763482900079, "learning_rate": 4.573886914219031e-06, "loss": 0.1073, "step": 5949 }, { "epoch": 1.62, "grad_norm": 1.7506037793493139, "learning_rate": 4.572418613330368e-06, "loss": 0.1227, "step": 5950 }, { "epoch": 1.62, "grad_norm": 1.8480968311463317, "learning_rate": 4.570950349585442e-06, "loss": 0.1203, "step": 5951 }, { "epoch": 1.62, "grad_norm": 1.724518155793137, "learning_rate": 4.569482123111804e-06, "loss": 0.1009, "step": 5952 }, { "epoch": 1.63, "grad_norm": 2.0633310051106135, "learning_rate": 4.568013934036993e-06, "loss": 0.1077, "step": 5953 }, { "epoch": 1.63, "grad_norm": 1.9928162663049163, "learning_rate": 4.566545782488554e-06, "loss": 0.1129, "step": 5954 }, { "epoch": 1.63, "grad_norm": 1.8053118334412859, "learning_rate": 4.56507766859402e-06, "loss": 0.1105, "step": 5955 }, { "epoch": 1.63, "grad_norm": 1.4233780665855478, "learning_rate": 4.563609592480931e-06, "loss": 0.0878, "step": 5956 }, { "epoch": 1.63, "grad_norm": 1.8812415416352042, "learning_rate": 4.562141554276811e-06, "loss": 0.1193, "step": 5957 }, { "epoch": 1.63, "grad_norm": 1.6792852742555906, "learning_rate": 4.5606735541091925e-06, "loss": 0.1015, "step": 5958 }, { "epoch": 1.63, "grad_norm": 2.072203027140658, "learning_rate": 4.559205592105599e-06, "loss": 0.1069, "step": 5959 }, { "epoch": 1.63, "grad_norm": 1.7459184580478888, "learning_rate": 4.557737668393551e-06, "loss": 0.1041, "step": 5960 }, { "epoch": 1.63, "grad_norm": 1.5168771234380187, "learning_rate": 4.556269783100565e-06, "loss": 0.0768, "step": 5961 }, { "epoch": 1.63, "grad_norm": 1.991111831668939, "learning_rate": 4.554801936354157e-06, "loss": 0.1244, "step": 5962 }, { "epoch": 1.63, "grad_norm": 1.9807953753972587, "learning_rate": 4.553334128281836e-06, "loss": 0.1413, "step": 5963 }, { "epoch": 1.63, "grad_norm": 1.9377278658324935, "learning_rate": 4.551866359011114e-06, "loss": 0.1249, "step": 5964 }, { "epoch": 1.63, "grad_norm": 2.2586336573239487, "learning_rate": 4.550398628669489e-06, "loss": 0.1288, "step": 5965 }, { "epoch": 1.63, "grad_norm": 1.6957542631992188, "learning_rate": 4.548930937384466e-06, "loss": 0.0984, "step": 5966 }, { "epoch": 1.63, "grad_norm": 2.1670118422770734, "learning_rate": 4.547463285283542e-06, "loss": 0.1399, "step": 5967 }, { "epoch": 1.63, "grad_norm": 1.5530313380967515, "learning_rate": 4.54599567249421e-06, "loss": 0.0844, "step": 5968 }, { "epoch": 1.63, "grad_norm": 1.6775201891284022, "learning_rate": 4.544528099143961e-06, "loss": 0.1158, "step": 5969 }, { "epoch": 1.63, "grad_norm": 1.9541166998928459, "learning_rate": 4.543060565360284e-06, "loss": 0.1293, "step": 5970 }, { "epoch": 1.63, "grad_norm": 1.8419156432081785, "learning_rate": 4.541593071270658e-06, "loss": 0.1118, "step": 5971 }, { "epoch": 1.63, "grad_norm": 1.5742995758436404, "learning_rate": 4.54012561700257e-06, "loss": 0.1118, "step": 5972 }, { "epoch": 1.63, "grad_norm": 1.930682830432296, "learning_rate": 4.53865820268349e-06, "loss": 0.1184, "step": 5973 }, { "epoch": 1.63, "grad_norm": 1.8406021395369825, "learning_rate": 4.537190828440898e-06, "loss": 0.1266, "step": 5974 }, { "epoch": 1.63, "grad_norm": 1.5844384959033837, "learning_rate": 4.535723494402258e-06, "loss": 0.1122, "step": 5975 }, { "epoch": 1.63, "grad_norm": 1.6337274901187584, "learning_rate": 4.534256200695042e-06, "loss": 0.1107, "step": 5976 }, { "epoch": 1.63, "grad_norm": 1.5682099887127616, "learning_rate": 4.532788947446706e-06, "loss": 0.0921, "step": 5977 }, { "epoch": 1.63, "grad_norm": 1.6174022104201136, "learning_rate": 4.531321734784717e-06, "loss": 0.1043, "step": 5978 }, { "epoch": 1.63, "grad_norm": 1.6407503054264556, "learning_rate": 4.529854562836525e-06, "loss": 0.095, "step": 5979 }, { "epoch": 1.63, "grad_norm": 1.4033207707062956, "learning_rate": 4.528387431729587e-06, "loss": 0.0898, "step": 5980 }, { "epoch": 1.63, "grad_norm": 2.1389391281939623, "learning_rate": 4.5269203415913465e-06, "loss": 0.1601, "step": 5981 }, { "epoch": 1.63, "grad_norm": 1.6035528289081962, "learning_rate": 4.525453292549255e-06, "loss": 0.1102, "step": 5982 }, { "epoch": 1.63, "grad_norm": 1.7262380214177933, "learning_rate": 4.523986284730747e-06, "loss": 0.1108, "step": 5983 }, { "epoch": 1.63, "grad_norm": 1.6105912783085636, "learning_rate": 4.5225193182632675e-06, "loss": 0.0968, "step": 5984 }, { "epoch": 1.63, "grad_norm": 1.710759912978609, "learning_rate": 4.5210523932742475e-06, "loss": 0.1038, "step": 5985 }, { "epoch": 1.63, "grad_norm": 1.5727984318899904, "learning_rate": 4.5195855098911165e-06, "loss": 0.0878, "step": 5986 }, { "epoch": 1.63, "grad_norm": 1.901003196193916, "learning_rate": 4.518118668241306e-06, "loss": 0.1112, "step": 5987 }, { "epoch": 1.63, "grad_norm": 1.8374057805227946, "learning_rate": 4.516651868452236e-06, "loss": 0.1118, "step": 5988 }, { "epoch": 1.63, "grad_norm": 1.8069375972387882, "learning_rate": 4.515185110651328e-06, "loss": 0.1082, "step": 5989 }, { "epoch": 1.64, "grad_norm": 1.7369838707424228, "learning_rate": 4.513718394965998e-06, "loss": 0.1022, "step": 5990 }, { "epoch": 1.64, "grad_norm": 1.8609155947739908, "learning_rate": 4.512251721523659e-06, "loss": 0.109, "step": 5991 }, { "epoch": 1.64, "grad_norm": 1.6686200305283587, "learning_rate": 4.510785090451719e-06, "loss": 0.0918, "step": 5992 }, { "epoch": 1.64, "grad_norm": 2.2044515457404033, "learning_rate": 4.509318501877586e-06, "loss": 0.1423, "step": 5993 }, { "epoch": 1.64, "grad_norm": 1.7281595478403926, "learning_rate": 4.507851955928659e-06, "loss": 0.1036, "step": 5994 }, { "epoch": 1.64, "grad_norm": 1.9442202317120507, "learning_rate": 4.506385452732338e-06, "loss": 0.1218, "step": 5995 }, { "epoch": 1.64, "grad_norm": 2.1510209923823966, "learning_rate": 4.5049189924160144e-06, "loss": 0.1091, "step": 5996 }, { "epoch": 1.64, "grad_norm": 1.7095541956394982, "learning_rate": 4.5034525751070825e-06, "loss": 0.1061, "step": 5997 }, { "epoch": 1.64, "grad_norm": 1.890543216708684, "learning_rate": 4.501986200932924e-06, "loss": 0.1172, "step": 5998 }, { "epoch": 1.64, "grad_norm": 1.8049146671292007, "learning_rate": 4.500519870020928e-06, "loss": 0.1045, "step": 5999 }, { "epoch": 1.64, "grad_norm": 1.9778979271571064, "learning_rate": 4.499053582498469e-06, "loss": 0.1336, "step": 6000 }, { "epoch": 1.64, "grad_norm": 1.8396988897962223, "learning_rate": 4.497587338492926e-06, "loss": 0.1063, "step": 6001 }, { "epoch": 1.64, "grad_norm": 1.5298610093213953, "learning_rate": 4.496121138131667e-06, "loss": 0.0876, "step": 6002 }, { "epoch": 1.64, "grad_norm": 1.6735007896285592, "learning_rate": 4.494654981542064e-06, "loss": 0.1017, "step": 6003 }, { "epoch": 1.64, "grad_norm": 1.5797400770801575, "learning_rate": 4.493188868851477e-06, "loss": 0.0955, "step": 6004 }, { "epoch": 1.64, "grad_norm": 1.9734114454759775, "learning_rate": 4.491722800187271e-06, "loss": 0.1278, "step": 6005 }, { "epoch": 1.64, "grad_norm": 2.057493024741134, "learning_rate": 4.4902567756767976e-06, "loss": 0.1146, "step": 6006 }, { "epoch": 1.64, "grad_norm": 1.8276332986971147, "learning_rate": 4.488790795447414e-06, "loss": 0.1047, "step": 6007 }, { "epoch": 1.64, "grad_norm": 1.879961989908558, "learning_rate": 4.487324859626465e-06, "loss": 0.1119, "step": 6008 }, { "epoch": 1.64, "grad_norm": 1.963071487620247, "learning_rate": 4.485858968341299e-06, "loss": 0.1252, "step": 6009 }, { "epoch": 1.64, "grad_norm": 1.7058874521568435, "learning_rate": 4.484393121719253e-06, "loss": 0.1192, "step": 6010 }, { "epoch": 1.64, "grad_norm": 1.8168198587149336, "learning_rate": 4.482927319887669e-06, "loss": 0.1037, "step": 6011 }, { "epoch": 1.64, "grad_norm": 1.6436685829908715, "learning_rate": 4.481461562973877e-06, "loss": 0.0992, "step": 6012 }, { "epoch": 1.64, "grad_norm": 1.6811039330883193, "learning_rate": 4.479995851105209e-06, "loss": 0.1003, "step": 6013 }, { "epoch": 1.64, "grad_norm": 1.6460242189581848, "learning_rate": 4.478530184408987e-06, "loss": 0.1019, "step": 6014 }, { "epoch": 1.64, "grad_norm": 1.9694314273719495, "learning_rate": 4.477064563012536e-06, "loss": 0.1088, "step": 6015 }, { "epoch": 1.64, "grad_norm": 1.9429413119297423, "learning_rate": 4.4755989870431705e-06, "loss": 0.1264, "step": 6016 }, { "epoch": 1.64, "grad_norm": 1.588587431121469, "learning_rate": 4.474133456628208e-06, "loss": 0.1072, "step": 6017 }, { "epoch": 1.64, "grad_norm": 1.772559048295154, "learning_rate": 4.472667971894955e-06, "loss": 0.1039, "step": 6018 }, { "epoch": 1.64, "grad_norm": 1.4110562646523765, "learning_rate": 4.471202532970719e-06, "loss": 0.085, "step": 6019 }, { "epoch": 1.64, "grad_norm": 1.932895919774141, "learning_rate": 4.469737139982801e-06, "loss": 0.1321, "step": 6020 }, { "epoch": 1.64, "grad_norm": 1.8008064895673273, "learning_rate": 4.4682717930585e-06, "loss": 0.1043, "step": 6021 }, { "epoch": 1.64, "grad_norm": 1.8801228819357656, "learning_rate": 4.466806492325106e-06, "loss": 0.1191, "step": 6022 }, { "epoch": 1.64, "grad_norm": 1.732623795018266, "learning_rate": 4.465341237909915e-06, "loss": 0.1039, "step": 6023 }, { "epoch": 1.64, "grad_norm": 1.8151171943216724, "learning_rate": 4.463876029940207e-06, "loss": 0.1131, "step": 6024 }, { "epoch": 1.64, "grad_norm": 2.02751617743123, "learning_rate": 4.462410868543268e-06, "loss": 0.1294, "step": 6025 }, { "epoch": 1.65, "grad_norm": 1.8267941223455404, "learning_rate": 4.460945753846373e-06, "loss": 0.1185, "step": 6026 }, { "epoch": 1.65, "grad_norm": 1.8182058831336576, "learning_rate": 4.459480685976798e-06, "loss": 0.1148, "step": 6027 }, { "epoch": 1.65, "grad_norm": 1.9317619077462784, "learning_rate": 4.458015665061807e-06, "loss": 0.1168, "step": 6028 }, { "epoch": 1.65, "grad_norm": 2.014717354376884, "learning_rate": 4.456550691228673e-06, "loss": 0.1217, "step": 6029 }, { "epoch": 1.65, "grad_norm": 1.5455633799754318, "learning_rate": 4.455085764604653e-06, "loss": 0.0993, "step": 6030 }, { "epoch": 1.65, "grad_norm": 1.7226404025610575, "learning_rate": 4.453620885317006e-06, "loss": 0.1102, "step": 6031 }, { "epoch": 1.65, "grad_norm": 1.6968506397729772, "learning_rate": 4.452156053492983e-06, "loss": 0.1122, "step": 6032 }, { "epoch": 1.65, "grad_norm": 1.775571699121647, "learning_rate": 4.450691269259837e-06, "loss": 0.1066, "step": 6033 }, { "epoch": 1.65, "grad_norm": 2.0232766543832352, "learning_rate": 4.449226532744807e-06, "loss": 0.1276, "step": 6034 }, { "epoch": 1.65, "grad_norm": 1.58766934271213, "learning_rate": 4.4477618440751395e-06, "loss": 0.0965, "step": 6035 }, { "epoch": 1.65, "grad_norm": 1.8608029797363759, "learning_rate": 4.4462972033780675e-06, "loss": 0.1195, "step": 6036 }, { "epoch": 1.65, "grad_norm": 1.68656101615884, "learning_rate": 4.444832610780827e-06, "loss": 0.1114, "step": 6037 }, { "epoch": 1.65, "grad_norm": 1.661519549708528, "learning_rate": 4.443368066410641e-06, "loss": 0.1054, "step": 6038 }, { "epoch": 1.65, "grad_norm": 1.7118041490560474, "learning_rate": 4.441903570394739e-06, "loss": 0.1214, "step": 6039 }, { "epoch": 1.65, "grad_norm": 2.0135784324447377, "learning_rate": 4.4404391228603366e-06, "loss": 0.125, "step": 6040 }, { "epoch": 1.65, "grad_norm": 1.8327260817054687, "learning_rate": 4.438974723934654e-06, "loss": 0.11, "step": 6041 }, { "epoch": 1.65, "grad_norm": 1.6715937331501853, "learning_rate": 4.437510373744897e-06, "loss": 0.0945, "step": 6042 }, { "epoch": 1.65, "grad_norm": 1.7992523445708128, "learning_rate": 4.436046072418278e-06, "loss": 0.1165, "step": 6043 }, { "epoch": 1.65, "grad_norm": 1.7225612603246383, "learning_rate": 4.4345818200819974e-06, "loss": 0.103, "step": 6044 }, { "epoch": 1.65, "grad_norm": 1.8926515529643086, "learning_rate": 4.433117616863255e-06, "loss": 0.1222, "step": 6045 }, { "epoch": 1.65, "grad_norm": 1.5403042936333862, "learning_rate": 4.4316534628892425e-06, "loss": 0.0907, "step": 6046 }, { "epoch": 1.65, "grad_norm": 1.9478355059591983, "learning_rate": 4.430189358287155e-06, "loss": 0.1237, "step": 6047 }, { "epoch": 1.65, "grad_norm": 1.8144992775176658, "learning_rate": 4.4287253031841725e-06, "loss": 0.1159, "step": 6048 }, { "epoch": 1.65, "grad_norm": 1.6139718530573681, "learning_rate": 4.427261297707482e-06, "loss": 0.1061, "step": 6049 }, { "epoch": 1.65, "grad_norm": 1.840463539097827, "learning_rate": 4.425797341984258e-06, "loss": 0.1077, "step": 6050 }, { "epoch": 1.65, "grad_norm": 1.5731634796924834, "learning_rate": 4.424333436141675e-06, "loss": 0.0893, "step": 6051 }, { "epoch": 1.65, "grad_norm": 1.4412368540337546, "learning_rate": 4.422869580306897e-06, "loss": 0.0939, "step": 6052 }, { "epoch": 1.65, "grad_norm": 1.6443479368626843, "learning_rate": 4.421405774607096e-06, "loss": 0.0992, "step": 6053 }, { "epoch": 1.65, "grad_norm": 1.7529625038341259, "learning_rate": 4.419942019169424e-06, "loss": 0.1086, "step": 6054 }, { "epoch": 1.65, "grad_norm": 1.7095652375383714, "learning_rate": 4.418478314121043e-06, "loss": 0.1051, "step": 6055 }, { "epoch": 1.65, "grad_norm": 1.8300389945089752, "learning_rate": 4.4170146595891006e-06, "loss": 0.1068, "step": 6056 }, { "epoch": 1.65, "grad_norm": 1.7265692656538434, "learning_rate": 4.415551055700745e-06, "loss": 0.1175, "step": 6057 }, { "epoch": 1.65, "grad_norm": 1.9590154169254117, "learning_rate": 4.414087502583116e-06, "loss": 0.1112, "step": 6058 }, { "epoch": 1.65, "grad_norm": 1.7273431418739582, "learning_rate": 4.4126240003633565e-06, "loss": 0.1119, "step": 6059 }, { "epoch": 1.65, "grad_norm": 2.0566043145868207, "learning_rate": 4.411160549168595e-06, "loss": 0.1356, "step": 6060 }, { "epoch": 1.65, "grad_norm": 2.041543829836337, "learning_rate": 4.409697149125964e-06, "loss": 0.1454, "step": 6061 }, { "epoch": 1.65, "grad_norm": 1.5205109912013173, "learning_rate": 4.408233800362586e-06, "loss": 0.0899, "step": 6062 }, { "epoch": 1.66, "grad_norm": 1.6649255006118326, "learning_rate": 4.406770503005584e-06, "loss": 0.1065, "step": 6063 }, { "epoch": 1.66, "grad_norm": 2.0694680538071646, "learning_rate": 4.405307257182069e-06, "loss": 0.1441, "step": 6064 }, { "epoch": 1.66, "grad_norm": 1.774343052389184, "learning_rate": 4.403844063019159e-06, "loss": 0.1306, "step": 6065 }, { "epoch": 1.66, "grad_norm": 1.5592185909555658, "learning_rate": 4.402380920643954e-06, "loss": 0.1083, "step": 6066 }, { "epoch": 1.66, "grad_norm": 1.6371961749187038, "learning_rate": 4.400917830183561e-06, "loss": 0.1034, "step": 6067 }, { "epoch": 1.66, "grad_norm": 1.755101902647616, "learning_rate": 4.399454791765076e-06, "loss": 0.0922, "step": 6068 }, { "epoch": 1.66, "grad_norm": 1.868333138884964, "learning_rate": 4.397991805515592e-06, "loss": 0.1285, "step": 6069 }, { "epoch": 1.66, "grad_norm": 1.7689899627588024, "learning_rate": 4.3965288715621965e-06, "loss": 0.1063, "step": 6070 }, { "epoch": 1.66, "grad_norm": 1.7490369364710514, "learning_rate": 4.395065990031979e-06, "loss": 0.1057, "step": 6071 }, { "epoch": 1.66, "grad_norm": 1.7585700914928077, "learning_rate": 4.3936031610520126e-06, "loss": 0.1136, "step": 6072 }, { "epoch": 1.66, "grad_norm": 1.8462882337026858, "learning_rate": 4.3921403847493775e-06, "loss": 0.095, "step": 6073 }, { "epoch": 1.66, "grad_norm": 2.1086245895975595, "learning_rate": 4.39067766125114e-06, "loss": 0.1357, "step": 6074 }, { "epoch": 1.66, "grad_norm": 1.4771881857171978, "learning_rate": 4.389214990684369e-06, "loss": 0.0854, "step": 6075 }, { "epoch": 1.66, "grad_norm": 1.780137085344616, "learning_rate": 4.387752373176123e-06, "loss": 0.1048, "step": 6076 }, { "epoch": 1.66, "grad_norm": 1.6358329176303328, "learning_rate": 4.386289808853462e-06, "loss": 0.1087, "step": 6077 }, { "epoch": 1.66, "grad_norm": 1.8109212818991554, "learning_rate": 4.384827297843437e-06, "loss": 0.1139, "step": 6078 }, { "epoch": 1.66, "grad_norm": 1.6343558317055689, "learning_rate": 4.383364840273094e-06, "loss": 0.0988, "step": 6079 }, { "epoch": 1.66, "grad_norm": 1.7288117879231606, "learning_rate": 4.381902436269479e-06, "loss": 0.1084, "step": 6080 }, { "epoch": 1.66, "grad_norm": 1.688601538329786, "learning_rate": 4.380440085959625e-06, "loss": 0.1109, "step": 6081 }, { "epoch": 1.66, "grad_norm": 1.6878755192456445, "learning_rate": 4.3789777894705706e-06, "loss": 0.1106, "step": 6082 }, { "epoch": 1.66, "grad_norm": 1.8208732034313206, "learning_rate": 4.377515546929341e-06, "loss": 0.1225, "step": 6083 }, { "epoch": 1.66, "grad_norm": 1.8024846244095072, "learning_rate": 4.3760533584629636e-06, "loss": 0.1154, "step": 6084 }, { "epoch": 1.66, "grad_norm": 1.629381080009447, "learning_rate": 4.374591224198455e-06, "loss": 0.0998, "step": 6085 }, { "epoch": 1.66, "grad_norm": 1.7362893865488154, "learning_rate": 4.373129144262832e-06, "loss": 0.1036, "step": 6086 }, { "epoch": 1.66, "grad_norm": 1.9259703915171458, "learning_rate": 4.371667118783101e-06, "loss": 0.1179, "step": 6087 }, { "epoch": 1.66, "grad_norm": 1.8622867567014016, "learning_rate": 4.370205147886273e-06, "loss": 0.1158, "step": 6088 }, { "epoch": 1.66, "grad_norm": 1.8340059736349363, "learning_rate": 4.3687432316993434e-06, "loss": 0.1111, "step": 6089 }, { "epoch": 1.66, "grad_norm": 1.7043827099226356, "learning_rate": 4.367281370349311e-06, "loss": 0.0984, "step": 6090 }, { "epoch": 1.66, "grad_norm": 1.81928292081651, "learning_rate": 4.365819563963166e-06, "loss": 0.1059, "step": 6091 }, { "epoch": 1.66, "grad_norm": 1.7309597713194826, "learning_rate": 4.364357812667894e-06, "loss": 0.0958, "step": 6092 }, { "epoch": 1.66, "grad_norm": 1.9684908703575503, "learning_rate": 4.362896116590475e-06, "loss": 0.1428, "step": 6093 }, { "epoch": 1.66, "grad_norm": 1.5332570645171262, "learning_rate": 4.361434475857891e-06, "loss": 0.0997, "step": 6094 }, { "epoch": 1.66, "grad_norm": 1.7070096177973109, "learning_rate": 4.3599728905971086e-06, "loss": 0.1163, "step": 6095 }, { "epoch": 1.66, "grad_norm": 1.8725119737451372, "learning_rate": 4.358511360935097e-06, "loss": 0.1083, "step": 6096 }, { "epoch": 1.66, "grad_norm": 1.7176913733880348, "learning_rate": 4.357049886998818e-06, "loss": 0.1146, "step": 6097 }, { "epoch": 1.66, "grad_norm": 1.9151234736466296, "learning_rate": 4.35558846891523e-06, "loss": 0.1331, "step": 6098 }, { "epoch": 1.67, "grad_norm": 1.6407195234162124, "learning_rate": 4.354127106811282e-06, "loss": 0.1041, "step": 6099 }, { "epoch": 1.67, "grad_norm": 1.6552260683068514, "learning_rate": 4.352665800813926e-06, "loss": 0.1038, "step": 6100 }, { "epoch": 1.67, "grad_norm": 1.6879649788991324, "learning_rate": 4.351204551050102e-06, "loss": 0.0989, "step": 6101 }, { "epoch": 1.67, "grad_norm": 1.6656070110711363, "learning_rate": 4.349743357646751e-06, "loss": 0.1008, "step": 6102 }, { "epoch": 1.67, "grad_norm": 1.7620849376688874, "learning_rate": 4.348282220730802e-06, "loss": 0.1197, "step": 6103 }, { "epoch": 1.67, "grad_norm": 2.1230782299117936, "learning_rate": 4.346821140429186e-06, "loss": 0.1236, "step": 6104 }, { "epoch": 1.67, "grad_norm": 1.9394631374902518, "learning_rate": 4.3453601168688225e-06, "loss": 0.1362, "step": 6105 }, { "epoch": 1.67, "grad_norm": 1.7893323098127385, "learning_rate": 4.343899150176635e-06, "loss": 0.1137, "step": 6106 }, { "epoch": 1.67, "grad_norm": 1.5678627477445248, "learning_rate": 4.342438240479533e-06, "loss": 0.0974, "step": 6107 }, { "epoch": 1.67, "grad_norm": 1.5471340414745498, "learning_rate": 4.340977387904427e-06, "loss": 0.0877, "step": 6108 }, { "epoch": 1.67, "grad_norm": 1.6239086620667968, "learning_rate": 4.339516592578218e-06, "loss": 0.0973, "step": 6109 }, { "epoch": 1.67, "grad_norm": 1.617461885183093, "learning_rate": 4.3380558546278075e-06, "loss": 0.0876, "step": 6110 }, { "epoch": 1.67, "grad_norm": 1.5980019663907559, "learning_rate": 4.336595174180085e-06, "loss": 0.1074, "step": 6111 }, { "epoch": 1.67, "grad_norm": 1.666743744786788, "learning_rate": 4.335134551361942e-06, "loss": 0.0981, "step": 6112 }, { "epoch": 1.67, "grad_norm": 1.848565637864274, "learning_rate": 4.333673986300262e-06, "loss": 0.1312, "step": 6113 }, { "epoch": 1.67, "grad_norm": 2.090829653417758, "learning_rate": 4.332213479121922e-06, "loss": 0.1214, "step": 6114 }, { "epoch": 1.67, "grad_norm": 2.0550833350570934, "learning_rate": 4.330753029953796e-06, "loss": 0.1177, "step": 6115 }, { "epoch": 1.67, "grad_norm": 1.93060655947151, "learning_rate": 4.329292638922753e-06, "loss": 0.1084, "step": 6116 }, { "epoch": 1.67, "grad_norm": 1.94170093657727, "learning_rate": 4.327832306155652e-06, "loss": 0.1227, "step": 6117 }, { "epoch": 1.67, "grad_norm": 1.7468851736640723, "learning_rate": 4.326372031779359e-06, "loss": 0.1037, "step": 6118 }, { "epoch": 1.67, "grad_norm": 1.6982515206555788, "learning_rate": 4.32491181592072e-06, "loss": 0.1048, "step": 6119 }, { "epoch": 1.67, "grad_norm": 1.5755799510466173, "learning_rate": 4.323451658706587e-06, "loss": 0.0882, "step": 6120 }, { "epoch": 1.67, "grad_norm": 1.703143848448169, "learning_rate": 4.321991560263802e-06, "loss": 0.0975, "step": 6121 }, { "epoch": 1.67, "grad_norm": 1.9970759020855016, "learning_rate": 4.320531520719203e-06, "loss": 0.1234, "step": 6122 }, { "epoch": 1.67, "grad_norm": 1.6293693062088537, "learning_rate": 4.319071540199621e-06, "loss": 0.1132, "step": 6123 }, { "epoch": 1.67, "grad_norm": 1.9609684899922433, "learning_rate": 4.317611618831888e-06, "loss": 0.1107, "step": 6124 }, { "epoch": 1.67, "grad_norm": 1.6835006063149112, "learning_rate": 4.316151756742821e-06, "loss": 0.11, "step": 6125 }, { "epoch": 1.67, "grad_norm": 1.7601749229382702, "learning_rate": 4.314691954059242e-06, "loss": 0.1145, "step": 6126 }, { "epoch": 1.67, "grad_norm": 1.6172299395662388, "learning_rate": 4.313232210907959e-06, "loss": 0.0882, "step": 6127 }, { "epoch": 1.67, "grad_norm": 1.5784701854701733, "learning_rate": 4.311772527415784e-06, "loss": 0.1148, "step": 6128 }, { "epoch": 1.67, "grad_norm": 1.6543611196364094, "learning_rate": 4.310312903709513e-06, "loss": 0.103, "step": 6129 }, { "epoch": 1.67, "grad_norm": 1.8485781630595715, "learning_rate": 4.308853339915949e-06, "loss": 0.1096, "step": 6130 }, { "epoch": 1.67, "grad_norm": 1.5805406977936411, "learning_rate": 4.307393836161877e-06, "loss": 0.0969, "step": 6131 }, { "epoch": 1.67, "grad_norm": 1.953262426161697, "learning_rate": 4.305934392574088e-06, "loss": 0.1355, "step": 6132 }, { "epoch": 1.67, "grad_norm": 1.6764775999032013, "learning_rate": 4.304475009279361e-06, "loss": 0.0969, "step": 6133 }, { "epoch": 1.67, "grad_norm": 1.5674610518184402, "learning_rate": 4.303015686404473e-06, "loss": 0.0987, "step": 6134 }, { "epoch": 1.67, "grad_norm": 1.7838356928953378, "learning_rate": 4.301556424076191e-06, "loss": 0.106, "step": 6135 }, { "epoch": 1.68, "grad_norm": 1.8373877789563595, "learning_rate": 4.300097222421287e-06, "loss": 0.1194, "step": 6136 }, { "epoch": 1.68, "grad_norm": 1.6884284285996094, "learning_rate": 4.298638081566513e-06, "loss": 0.0857, "step": 6137 }, { "epoch": 1.68, "grad_norm": 1.621829373862572, "learning_rate": 4.297179001638629e-06, "loss": 0.0988, "step": 6138 }, { "epoch": 1.68, "grad_norm": 1.922966772177148, "learning_rate": 4.295719982764382e-06, "loss": 0.1172, "step": 6139 }, { "epoch": 1.68, "grad_norm": 2.0303617599944297, "learning_rate": 4.294261025070519e-06, "loss": 0.1256, "step": 6140 }, { "epoch": 1.68, "grad_norm": 1.8962979237111295, "learning_rate": 4.292802128683773e-06, "loss": 0.1301, "step": 6141 }, { "epoch": 1.68, "grad_norm": 1.8550087231272105, "learning_rate": 4.291343293730885e-06, "loss": 0.1094, "step": 6142 }, { "epoch": 1.68, "grad_norm": 1.62859130809329, "learning_rate": 4.289884520338577e-06, "loss": 0.1074, "step": 6143 }, { "epoch": 1.68, "grad_norm": 1.70412936651213, "learning_rate": 4.2884258086335755e-06, "loss": 0.0997, "step": 6144 }, { "epoch": 1.68, "grad_norm": 1.7740179603308133, "learning_rate": 4.286967158742596e-06, "loss": 0.1012, "step": 6145 }, { "epoch": 1.68, "grad_norm": 1.6360805729378918, "learning_rate": 4.285508570792351e-06, "loss": 0.112, "step": 6146 }, { "epoch": 1.68, "grad_norm": 1.6797344581401086, "learning_rate": 4.2840500449095455e-06, "loss": 0.1103, "step": 6147 }, { "epoch": 1.68, "grad_norm": 1.7092724354614606, "learning_rate": 4.282591581220886e-06, "loss": 0.1031, "step": 6148 }, { "epoch": 1.68, "grad_norm": 1.7744060713396335, "learning_rate": 4.281133179853061e-06, "loss": 0.1156, "step": 6149 }, { "epoch": 1.68, "grad_norm": 1.9371245417592706, "learning_rate": 4.279674840932767e-06, "loss": 0.1298, "step": 6150 }, { "epoch": 1.68, "grad_norm": 1.5977810422862884, "learning_rate": 4.278216564586687e-06, "loss": 0.1094, "step": 6151 }, { "epoch": 1.68, "grad_norm": 1.74495715300954, "learning_rate": 4.2767583509415e-06, "loss": 0.1144, "step": 6152 }, { "epoch": 1.68, "grad_norm": 1.8100158669145083, "learning_rate": 4.275300200123879e-06, "loss": 0.1095, "step": 6153 }, { "epoch": 1.68, "grad_norm": 1.7526332567600043, "learning_rate": 4.2738421122604964e-06, "loss": 0.1137, "step": 6154 }, { "epoch": 1.68, "grad_norm": 1.7975184162380935, "learning_rate": 4.272384087478011e-06, "loss": 0.1126, "step": 6155 }, { "epoch": 1.68, "grad_norm": 1.7337847735619645, "learning_rate": 4.270926125903085e-06, "loss": 0.106, "step": 6156 }, { "epoch": 1.68, "grad_norm": 1.7744175740297166, "learning_rate": 4.2694682276623675e-06, "loss": 0.1096, "step": 6157 }, { "epoch": 1.68, "grad_norm": 1.6513612086909404, "learning_rate": 4.268010392882506e-06, "loss": 0.1114, "step": 6158 }, { "epoch": 1.68, "grad_norm": 1.8202119447634264, "learning_rate": 4.266552621690141e-06, "loss": 0.118, "step": 6159 }, { "epoch": 1.68, "grad_norm": 1.969437549994304, "learning_rate": 4.2650949142119116e-06, "loss": 0.1116, "step": 6160 }, { "epoch": 1.68, "grad_norm": 2.0954889027229138, "learning_rate": 4.2636372705744425e-06, "loss": 0.1187, "step": 6161 }, { "epoch": 1.68, "grad_norm": 1.5991152732132552, "learning_rate": 4.262179690904363e-06, "loss": 0.1055, "step": 6162 }, { "epoch": 1.68, "grad_norm": 1.6651971914598611, "learning_rate": 4.26072217532829e-06, "loss": 0.1009, "step": 6163 }, { "epoch": 1.68, "grad_norm": 1.8137942352817806, "learning_rate": 4.259264723972839e-06, "loss": 0.0978, "step": 6164 }, { "epoch": 1.68, "grad_norm": 1.901322185348973, "learning_rate": 4.2578073369646135e-06, "loss": 0.119, "step": 6165 }, { "epoch": 1.68, "grad_norm": 1.5771608730415214, "learning_rate": 4.256350014430221e-06, "loss": 0.0991, "step": 6166 }, { "epoch": 1.68, "grad_norm": 1.725720906861579, "learning_rate": 4.254892756496255e-06, "loss": 0.1064, "step": 6167 }, { "epoch": 1.68, "grad_norm": 1.8779194416022726, "learning_rate": 4.2534355632893085e-06, "loss": 0.1079, "step": 6168 }, { "epoch": 1.68, "grad_norm": 1.7127076584945626, "learning_rate": 4.251978434935964e-06, "loss": 0.1056, "step": 6169 }, { "epoch": 1.68, "grad_norm": 1.7260917632296027, "learning_rate": 4.250521371562803e-06, "loss": 0.1038, "step": 6170 }, { "epoch": 1.68, "grad_norm": 1.6001759860508347, "learning_rate": 4.249064373296403e-06, "loss": 0.0946, "step": 6171 }, { "epoch": 1.68, "grad_norm": 1.872591333169689, "learning_rate": 4.247607440263329e-06, "loss": 0.1096, "step": 6172 }, { "epoch": 1.69, "grad_norm": 1.7913107046741024, "learning_rate": 4.246150572590145e-06, "loss": 0.1143, "step": 6173 }, { "epoch": 1.69, "grad_norm": 1.6940806817026999, "learning_rate": 4.2446937704034065e-06, "loss": 0.0934, "step": 6174 }, { "epoch": 1.69, "grad_norm": 1.6213743474547668, "learning_rate": 4.243237033829668e-06, "loss": 0.1053, "step": 6175 }, { "epoch": 1.69, "grad_norm": 1.9097946400190038, "learning_rate": 4.241780362995471e-06, "loss": 0.1106, "step": 6176 }, { "epoch": 1.69, "grad_norm": 1.8433203070746311, "learning_rate": 4.240323758027361e-06, "loss": 0.1119, "step": 6177 }, { "epoch": 1.69, "grad_norm": 1.9083970681942128, "learning_rate": 4.238867219051868e-06, "loss": 0.1081, "step": 6178 }, { "epoch": 1.69, "grad_norm": 1.896381519393454, "learning_rate": 4.237410746195524e-06, "loss": 0.1149, "step": 6179 }, { "epoch": 1.69, "grad_norm": 1.5956697135336217, "learning_rate": 4.235954339584849e-06, "loss": 0.1079, "step": 6180 }, { "epoch": 1.69, "grad_norm": 1.7288693670369817, "learning_rate": 4.234497999346363e-06, "loss": 0.1125, "step": 6181 }, { "epoch": 1.69, "grad_norm": 1.74024813877436, "learning_rate": 4.233041725606573e-06, "loss": 0.1063, "step": 6182 }, { "epoch": 1.69, "grad_norm": 1.6642118478211958, "learning_rate": 4.231585518491989e-06, "loss": 0.1166, "step": 6183 }, { "epoch": 1.69, "grad_norm": 1.7322645731058426, "learning_rate": 4.23012937812911e-06, "loss": 0.1052, "step": 6184 }, { "epoch": 1.69, "grad_norm": 1.8102222726026993, "learning_rate": 4.22867330464443e-06, "loss": 0.1106, "step": 6185 }, { "epoch": 1.69, "grad_norm": 1.9057024966265923, "learning_rate": 4.227217298164434e-06, "loss": 0.1047, "step": 6186 }, { "epoch": 1.69, "grad_norm": 2.0122740418217004, "learning_rate": 4.22576135881561e-06, "loss": 0.1277, "step": 6187 }, { "epoch": 1.69, "grad_norm": 2.0036301671695225, "learning_rate": 4.2243054867244285e-06, "loss": 0.1255, "step": 6188 }, { "epoch": 1.69, "grad_norm": 1.9717285805002271, "learning_rate": 4.222849682017366e-06, "loss": 0.1212, "step": 6189 }, { "epoch": 1.69, "grad_norm": 1.5790668876520453, "learning_rate": 4.221393944820883e-06, "loss": 0.1045, "step": 6190 }, { "epoch": 1.69, "grad_norm": 1.7476060625551413, "learning_rate": 4.219938275261442e-06, "loss": 0.1086, "step": 6191 }, { "epoch": 1.69, "grad_norm": 1.5785725736088405, "learning_rate": 4.2184826734654925e-06, "loss": 0.1033, "step": 6192 }, { "epoch": 1.69, "grad_norm": 1.8288703972164801, "learning_rate": 4.2170271395594855e-06, "loss": 0.117, "step": 6193 }, { "epoch": 1.69, "grad_norm": 1.8825803403152777, "learning_rate": 4.215571673669857e-06, "loss": 0.1199, "step": 6194 }, { "epoch": 1.69, "grad_norm": 1.6492432007552422, "learning_rate": 4.214116275923051e-06, "loss": 0.0913, "step": 6195 }, { "epoch": 1.69, "grad_norm": 1.7909989098790684, "learning_rate": 4.2126609464454876e-06, "loss": 0.1209, "step": 6196 }, { "epoch": 1.69, "grad_norm": 1.8332592624498136, "learning_rate": 4.211205685363597e-06, "loss": 0.1033, "step": 6197 }, { "epoch": 1.69, "grad_norm": 1.9176775788282023, "learning_rate": 4.209750492803794e-06, "loss": 0.1384, "step": 6198 }, { "epoch": 1.69, "grad_norm": 1.9712404700660504, "learning_rate": 4.208295368892491e-06, "loss": 0.1189, "step": 6199 }, { "epoch": 1.69, "grad_norm": 1.6637269645537947, "learning_rate": 4.206840313756092e-06, "loss": 0.0971, "step": 6200 }, { "epoch": 1.69, "grad_norm": 1.5477873849277628, "learning_rate": 4.205385327521002e-06, "loss": 0.1024, "step": 6201 }, { "epoch": 1.69, "grad_norm": 1.8596908728724282, "learning_rate": 4.203930410313608e-06, "loss": 0.0951, "step": 6202 }, { "epoch": 1.69, "grad_norm": 1.4778054525938429, "learning_rate": 4.202475562260302e-06, "loss": 0.0866, "step": 6203 }, { "epoch": 1.69, "grad_norm": 2.0126047816033177, "learning_rate": 4.201020783487465e-06, "loss": 0.1171, "step": 6204 }, { "epoch": 1.69, "grad_norm": 1.8389749463207241, "learning_rate": 4.199566074121473e-06, "loss": 0.1115, "step": 6205 }, { "epoch": 1.69, "grad_norm": 1.5828426566207194, "learning_rate": 4.198111434288693e-06, "loss": 0.1021, "step": 6206 }, { "epoch": 1.69, "grad_norm": 2.0356217362206266, "learning_rate": 4.196656864115494e-06, "loss": 0.1134, "step": 6207 }, { "epoch": 1.69, "grad_norm": 1.727423384079484, "learning_rate": 4.195202363728227e-06, "loss": 0.1132, "step": 6208 }, { "epoch": 1.7, "grad_norm": 1.887721590345589, "learning_rate": 4.19374793325325e-06, "loss": 0.1031, "step": 6209 }, { "epoch": 1.7, "grad_norm": 1.8682436645469198, "learning_rate": 4.1922935728169045e-06, "loss": 0.1304, "step": 6210 }, { "epoch": 1.7, "grad_norm": 1.6056136810925175, "learning_rate": 4.190839282545532e-06, "loss": 0.0898, "step": 6211 }, { "epoch": 1.7, "grad_norm": 1.834031052172413, "learning_rate": 4.1893850625654626e-06, "loss": 0.1218, "step": 6212 }, { "epoch": 1.7, "grad_norm": 1.7464525978972036, "learning_rate": 4.187930913003029e-06, "loss": 0.1162, "step": 6213 }, { "epoch": 1.7, "grad_norm": 1.8107501587966144, "learning_rate": 4.186476833984546e-06, "loss": 0.1112, "step": 6214 }, { "epoch": 1.7, "grad_norm": 1.6075800943630099, "learning_rate": 4.185022825636334e-06, "loss": 0.1191, "step": 6215 }, { "epoch": 1.7, "grad_norm": 1.545418033846747, "learning_rate": 4.183568888084698e-06, "loss": 0.0998, "step": 6216 }, { "epoch": 1.7, "grad_norm": 1.5586167232423198, "learning_rate": 4.182115021455944e-06, "loss": 0.1006, "step": 6217 }, { "epoch": 1.7, "grad_norm": 1.9304884502825523, "learning_rate": 4.180661225876363e-06, "loss": 0.1391, "step": 6218 }, { "epoch": 1.7, "grad_norm": 1.849257718600328, "learning_rate": 4.179207501472254e-06, "loss": 0.1306, "step": 6219 }, { "epoch": 1.7, "grad_norm": 1.6273398337223621, "learning_rate": 4.177753848369892e-06, "loss": 0.1085, "step": 6220 }, { "epoch": 1.7, "grad_norm": 1.6257542497069029, "learning_rate": 4.1763002666955615e-06, "loss": 0.11, "step": 6221 }, { "epoch": 1.7, "grad_norm": 2.0146443830332688, "learning_rate": 4.174846756575531e-06, "loss": 0.1186, "step": 6222 }, { "epoch": 1.7, "grad_norm": 1.399684554794835, "learning_rate": 4.1733933181360685e-06, "loss": 0.0891, "step": 6223 }, { "epoch": 1.7, "grad_norm": 1.8405455736401044, "learning_rate": 4.1719399515034285e-06, "loss": 0.1354, "step": 6224 }, { "epoch": 1.7, "grad_norm": 1.6416844235507686, "learning_rate": 4.1704866568038715e-06, "loss": 0.1185, "step": 6225 }, { "epoch": 1.7, "grad_norm": 1.609336411819547, "learning_rate": 4.169033434163637e-06, "loss": 0.1125, "step": 6226 }, { "epoch": 1.7, "grad_norm": 2.19935327321842, "learning_rate": 4.167580283708971e-06, "loss": 0.1125, "step": 6227 }, { "epoch": 1.7, "grad_norm": 1.7915923785389125, "learning_rate": 4.166127205566104e-06, "loss": 0.1146, "step": 6228 }, { "epoch": 1.7, "grad_norm": 1.5245011255218217, "learning_rate": 4.1646741998612676e-06, "loss": 0.0893, "step": 6229 }, { "epoch": 1.7, "grad_norm": 1.7861856473251987, "learning_rate": 4.1632212667206786e-06, "loss": 0.1136, "step": 6230 }, { "epoch": 1.7, "grad_norm": 1.9102802804857326, "learning_rate": 4.161768406270559e-06, "loss": 0.1039, "step": 6231 }, { "epoch": 1.7, "grad_norm": 1.744142733969087, "learning_rate": 4.1603156186371106e-06, "loss": 0.1081, "step": 6232 }, { "epoch": 1.7, "grad_norm": 1.9990226150833714, "learning_rate": 4.158862903946543e-06, "loss": 0.1283, "step": 6233 }, { "epoch": 1.7, "grad_norm": 1.9946853239127929, "learning_rate": 4.1574102623250476e-06, "loss": 0.1078, "step": 6234 }, { "epoch": 1.7, "grad_norm": 1.7170015494425657, "learning_rate": 4.155957693898817e-06, "loss": 0.1133, "step": 6235 }, { "epoch": 1.7, "grad_norm": 1.8090467061126, "learning_rate": 4.154505198794034e-06, "loss": 0.1224, "step": 6236 }, { "epoch": 1.7, "grad_norm": 1.830096191286136, "learning_rate": 4.153052777136879e-06, "loss": 0.1338, "step": 6237 }, { "epoch": 1.7, "grad_norm": 1.6866106988384477, "learning_rate": 4.151600429053517e-06, "loss": 0.1175, "step": 6238 }, { "epoch": 1.7, "grad_norm": 1.8175927489964359, "learning_rate": 4.1501481546701185e-06, "loss": 0.1116, "step": 6239 }, { "epoch": 1.7, "grad_norm": 1.4777329478896946, "learning_rate": 4.148695954112838e-06, "loss": 0.0923, "step": 6240 }, { "epoch": 1.7, "grad_norm": 1.5771567064194922, "learning_rate": 4.147243827507829e-06, "loss": 0.0854, "step": 6241 }, { "epoch": 1.7, "grad_norm": 1.9061231967446501, "learning_rate": 4.1457917749812345e-06, "loss": 0.1239, "step": 6242 }, { "epoch": 1.7, "grad_norm": 1.4517485857028007, "learning_rate": 4.1443397966591985e-06, "loss": 0.1015, "step": 6243 }, { "epoch": 1.7, "grad_norm": 2.314425253776543, "learning_rate": 4.142887892667848e-06, "loss": 0.1322, "step": 6244 }, { "epoch": 1.7, "grad_norm": 1.722177867766709, "learning_rate": 4.141436063133312e-06, "loss": 0.1049, "step": 6245 }, { "epoch": 1.71, "grad_norm": 1.6777799467020105, "learning_rate": 4.1399843081817085e-06, "loss": 0.0967, "step": 6246 }, { "epoch": 1.71, "grad_norm": 1.7876454286767574, "learning_rate": 4.138532627939153e-06, "loss": 0.115, "step": 6247 }, { "epoch": 1.71, "grad_norm": 1.7104228494378149, "learning_rate": 4.137081022531748e-06, "loss": 0.1169, "step": 6248 }, { "epoch": 1.71, "grad_norm": 1.8195999084262289, "learning_rate": 4.1356294920856e-06, "loss": 0.1316, "step": 6249 }, { "epoch": 1.71, "grad_norm": 2.012071523505236, "learning_rate": 4.134178036726795e-06, "loss": 0.1345, "step": 6250 }, { "epoch": 1.71, "grad_norm": 1.9317357464122598, "learning_rate": 4.132726656581426e-06, "loss": 0.1175, "step": 6251 }, { "epoch": 1.71, "grad_norm": 1.6234949767479492, "learning_rate": 4.13127535177557e-06, "loss": 0.0964, "step": 6252 }, { "epoch": 1.71, "grad_norm": 1.6201085088735945, "learning_rate": 4.129824122435304e-06, "loss": 0.1219, "step": 6253 }, { "epoch": 1.71, "grad_norm": 1.6107762805712103, "learning_rate": 4.128372968686691e-06, "loss": 0.0935, "step": 6254 }, { "epoch": 1.71, "grad_norm": 1.589530462542716, "learning_rate": 4.126921890655797e-06, "loss": 0.0922, "step": 6255 }, { "epoch": 1.71, "grad_norm": 1.7950944160366196, "learning_rate": 4.125470888468672e-06, "loss": 0.107, "step": 6256 }, { "epoch": 1.71, "grad_norm": 1.9591376933550466, "learning_rate": 4.124019962251366e-06, "loss": 0.1043, "step": 6257 }, { "epoch": 1.71, "grad_norm": 1.9207632201314828, "learning_rate": 4.12256911212992e-06, "loss": 0.1087, "step": 6258 }, { "epoch": 1.71, "grad_norm": 1.5569263988899922, "learning_rate": 4.121118338230369e-06, "loss": 0.0943, "step": 6259 }, { "epoch": 1.71, "grad_norm": 2.1941362611572037, "learning_rate": 4.119667640678737e-06, "loss": 0.1174, "step": 6260 }, { "epoch": 1.71, "grad_norm": 1.674340734627718, "learning_rate": 4.118217019601053e-06, "loss": 0.0842, "step": 6261 }, { "epoch": 1.71, "grad_norm": 1.3077173771590105, "learning_rate": 4.116766475123322e-06, "loss": 0.0736, "step": 6262 }, { "epoch": 1.71, "grad_norm": 1.7016590249287207, "learning_rate": 4.115316007371557e-06, "loss": 0.1004, "step": 6263 }, { "epoch": 1.71, "grad_norm": 1.9484416720576012, "learning_rate": 4.113865616471761e-06, "loss": 0.1383, "step": 6264 }, { "epoch": 1.71, "grad_norm": 1.4888373401562678, "learning_rate": 4.112415302549925e-06, "loss": 0.0862, "step": 6265 }, { "epoch": 1.71, "grad_norm": 1.6757488447946416, "learning_rate": 4.11096506573204e-06, "loss": 0.0991, "step": 6266 }, { "epoch": 1.71, "grad_norm": 1.797865324735706, "learning_rate": 4.109514906144084e-06, "loss": 0.1112, "step": 6267 }, { "epoch": 1.71, "grad_norm": 1.8179678288211754, "learning_rate": 4.108064823912035e-06, "loss": 0.116, "step": 6268 }, { "epoch": 1.71, "grad_norm": 1.6192344758158899, "learning_rate": 4.106614819161857e-06, "loss": 0.0995, "step": 6269 }, { "epoch": 1.71, "grad_norm": 1.9529892900646009, "learning_rate": 4.105164892019514e-06, "loss": 0.1287, "step": 6270 }, { "epoch": 1.71, "grad_norm": 1.7831377880203494, "learning_rate": 4.103715042610958e-06, "loss": 0.113, "step": 6271 }, { "epoch": 1.71, "grad_norm": 1.71200879740593, "learning_rate": 4.102265271062139e-06, "loss": 0.1111, "step": 6272 }, { "epoch": 1.71, "grad_norm": 1.7451634224136954, "learning_rate": 4.100815577498995e-06, "loss": 0.0938, "step": 6273 }, { "epoch": 1.71, "grad_norm": 1.658340972407541, "learning_rate": 4.099365962047464e-06, "loss": 0.1152, "step": 6274 }, { "epoch": 1.71, "grad_norm": 1.9082733883621454, "learning_rate": 4.097916424833469e-06, "loss": 0.1207, "step": 6275 }, { "epoch": 1.71, "grad_norm": 1.8794396958763797, "learning_rate": 4.0964669659829335e-06, "loss": 0.1144, "step": 6276 }, { "epoch": 1.71, "grad_norm": 1.8992218776703043, "learning_rate": 4.095017585621767e-06, "loss": 0.1137, "step": 6277 }, { "epoch": 1.71, "grad_norm": 1.4857048235580483, "learning_rate": 4.093568283875882e-06, "loss": 0.0982, "step": 6278 }, { "epoch": 1.71, "grad_norm": 1.822075966997169, "learning_rate": 4.0921190608711745e-06, "loss": 0.1305, "step": 6279 }, { "epoch": 1.71, "grad_norm": 1.7028811322326387, "learning_rate": 4.090669916733539e-06, "loss": 0.1175, "step": 6280 }, { "epoch": 1.71, "grad_norm": 1.5323279042293845, "learning_rate": 4.089220851588861e-06, "loss": 0.1033, "step": 6281 }, { "epoch": 1.71, "grad_norm": 1.8749770243241926, "learning_rate": 4.087771865563022e-06, "loss": 0.1146, "step": 6282 }, { "epoch": 1.72, "grad_norm": 1.6218832551442097, "learning_rate": 4.08632295878189e-06, "loss": 0.1001, "step": 6283 }, { "epoch": 1.72, "grad_norm": 1.8377943380263704, "learning_rate": 4.084874131371337e-06, "loss": 0.1201, "step": 6284 }, { "epoch": 1.72, "grad_norm": 1.693273814372446, "learning_rate": 4.083425383457215e-06, "loss": 0.1161, "step": 6285 }, { "epoch": 1.72, "grad_norm": 1.4607612580036378, "learning_rate": 4.081976715165382e-06, "loss": 0.0947, "step": 6286 }, { "epoch": 1.72, "grad_norm": 1.9474545859606078, "learning_rate": 4.080528126621679e-06, "loss": 0.1205, "step": 6287 }, { "epoch": 1.72, "grad_norm": 1.8513150835051702, "learning_rate": 4.079079617951946e-06, "loss": 0.1188, "step": 6288 }, { "epoch": 1.72, "grad_norm": 2.0285810892990783, "learning_rate": 4.077631189282011e-06, "loss": 0.1349, "step": 6289 }, { "epoch": 1.72, "grad_norm": 1.7125713816727879, "learning_rate": 4.0761828407377035e-06, "loss": 0.1132, "step": 6290 }, { "epoch": 1.72, "grad_norm": 1.8330073900229522, "learning_rate": 4.074734572444835e-06, "loss": 0.1004, "step": 6291 }, { "epoch": 1.72, "grad_norm": 2.107192627083278, "learning_rate": 4.0732863845292204e-06, "loss": 0.1347, "step": 6292 }, { "epoch": 1.72, "grad_norm": 2.009899782269485, "learning_rate": 4.071838277116659e-06, "loss": 0.1341, "step": 6293 }, { "epoch": 1.72, "grad_norm": 1.7518023673265388, "learning_rate": 4.070390250332951e-06, "loss": 0.1179, "step": 6294 }, { "epoch": 1.72, "grad_norm": 1.7840522966070804, "learning_rate": 4.06894230430388e-06, "loss": 0.1139, "step": 6295 }, { "epoch": 1.72, "grad_norm": 1.85980833486709, "learning_rate": 4.067494439155236e-06, "loss": 0.1225, "step": 6296 }, { "epoch": 1.72, "grad_norm": 1.6839369729734928, "learning_rate": 4.066046655012786e-06, "loss": 0.1116, "step": 6297 }, { "epoch": 1.72, "grad_norm": 1.8625577205152355, "learning_rate": 4.0645989520023035e-06, "loss": 0.1116, "step": 6298 }, { "epoch": 1.72, "grad_norm": 1.8290424019671168, "learning_rate": 4.0631513302495475e-06, "loss": 0.1022, "step": 6299 }, { "epoch": 1.72, "grad_norm": 1.6136230365369777, "learning_rate": 4.0617037898802744e-06, "loss": 0.1068, "step": 6300 }, { "epoch": 1.72, "grad_norm": 1.6510545205686094, "learning_rate": 4.060256331020226e-06, "loss": 0.0912, "step": 6301 }, { "epoch": 1.72, "grad_norm": 1.522876984073007, "learning_rate": 4.058808953795149e-06, "loss": 0.0995, "step": 6302 }, { "epoch": 1.72, "grad_norm": 1.8819209908448353, "learning_rate": 4.0573616583307705e-06, "loss": 0.1159, "step": 6303 }, { "epoch": 1.72, "grad_norm": 2.0839851262176987, "learning_rate": 4.05591444475282e-06, "loss": 0.1364, "step": 6304 }, { "epoch": 1.72, "grad_norm": 1.5986155566970244, "learning_rate": 4.054467313187013e-06, "loss": 0.1056, "step": 6305 }, { "epoch": 1.72, "grad_norm": 1.916139763030213, "learning_rate": 4.053020263759064e-06, "loss": 0.1223, "step": 6306 }, { "epoch": 1.72, "grad_norm": 1.7410239582425142, "learning_rate": 4.051573296594673e-06, "loss": 0.1071, "step": 6307 }, { "epoch": 1.72, "grad_norm": 1.7294536644988945, "learning_rate": 4.050126411819544e-06, "loss": 0.1157, "step": 6308 }, { "epoch": 1.72, "grad_norm": 1.714476912047543, "learning_rate": 4.048679609559359e-06, "loss": 0.0967, "step": 6309 }, { "epoch": 1.72, "grad_norm": 1.4838636893457047, "learning_rate": 4.047232889939807e-06, "loss": 0.0891, "step": 6310 }, { "epoch": 1.72, "grad_norm": 1.5880553438132763, "learning_rate": 4.04578625308656e-06, "loss": 0.0943, "step": 6311 }, { "epoch": 1.72, "grad_norm": 1.6927265223940133, "learning_rate": 4.044339699125289e-06, "loss": 0.1038, "step": 6312 }, { "epoch": 1.72, "grad_norm": 1.7261844509031412, "learning_rate": 4.0428932281816524e-06, "loss": 0.0913, "step": 6313 }, { "epoch": 1.72, "grad_norm": 1.7361019847026014, "learning_rate": 4.041446840381309e-06, "loss": 0.1098, "step": 6314 }, { "epoch": 1.72, "grad_norm": 2.012847330060325, "learning_rate": 4.0400005358499e-06, "loss": 0.1068, "step": 6315 }, { "epoch": 1.72, "grad_norm": 1.967308941807177, "learning_rate": 4.0385543147130694e-06, "loss": 0.1091, "step": 6316 }, { "epoch": 1.72, "grad_norm": 1.8784430748475012, "learning_rate": 4.037108177096447e-06, "loss": 0.1253, "step": 6317 }, { "epoch": 1.72, "grad_norm": 1.8152356342976743, "learning_rate": 4.03566212312566e-06, "loss": 0.1091, "step": 6318 }, { "epoch": 1.73, "grad_norm": 1.6640510985766093, "learning_rate": 4.034216152926322e-06, "loss": 0.1039, "step": 6319 }, { "epoch": 1.73, "grad_norm": 1.7465476414731584, "learning_rate": 4.032770266624051e-06, "loss": 0.1144, "step": 6320 }, { "epoch": 1.73, "grad_norm": 1.6961642023465495, "learning_rate": 4.031324464344441e-06, "loss": 0.1057, "step": 6321 }, { "epoch": 1.73, "grad_norm": 1.6043180054873025, "learning_rate": 4.029878746213096e-06, "loss": 0.104, "step": 6322 }, { "epoch": 1.73, "grad_norm": 1.8908366038279583, "learning_rate": 4.0284331123556e-06, "loss": 0.1204, "step": 6323 }, { "epoch": 1.73, "grad_norm": 1.9136045368627994, "learning_rate": 4.026987562897537e-06, "loss": 0.133, "step": 6324 }, { "epoch": 1.73, "grad_norm": 1.867368555275212, "learning_rate": 4.025542097964478e-06, "loss": 0.1194, "step": 6325 }, { "epoch": 1.73, "grad_norm": 1.7613660714842654, "learning_rate": 4.024096717681994e-06, "loss": 0.0983, "step": 6326 }, { "epoch": 1.73, "grad_norm": 1.6330437116195555, "learning_rate": 4.022651422175639e-06, "loss": 0.1103, "step": 6327 }, { "epoch": 1.73, "grad_norm": 1.6727600710904147, "learning_rate": 4.02120621157097e-06, "loss": 0.0972, "step": 6328 }, { "epoch": 1.73, "grad_norm": 1.7047219839487577, "learning_rate": 4.0197610859935275e-06, "loss": 0.1178, "step": 6329 }, { "epoch": 1.73, "grad_norm": 1.6410471793023342, "learning_rate": 4.018316045568853e-06, "loss": 0.1025, "step": 6330 }, { "epoch": 1.73, "grad_norm": 1.6415898324600682, "learning_rate": 4.016871090422471e-06, "loss": 0.1015, "step": 6331 }, { "epoch": 1.73, "grad_norm": 1.6361449685598493, "learning_rate": 4.015426220679909e-06, "loss": 0.0948, "step": 6332 }, { "epoch": 1.73, "grad_norm": 1.7076354447215008, "learning_rate": 4.013981436466677e-06, "loss": 0.1195, "step": 6333 }, { "epoch": 1.73, "grad_norm": 1.9945633506345075, "learning_rate": 4.012536737908288e-06, "loss": 0.1232, "step": 6334 }, { "epoch": 1.73, "grad_norm": 2.016750448680824, "learning_rate": 4.011092125130238e-06, "loss": 0.1337, "step": 6335 }, { "epoch": 1.73, "grad_norm": 1.6595075714977763, "learning_rate": 4.009647598258022e-06, "loss": 0.1015, "step": 6336 }, { "epoch": 1.73, "grad_norm": 1.5939061565182813, "learning_rate": 4.008203157417122e-06, "loss": 0.1097, "step": 6337 }, { "epoch": 1.73, "grad_norm": 1.5599216239504967, "learning_rate": 4.00675880273302e-06, "loss": 0.0992, "step": 6338 }, { "epoch": 1.73, "grad_norm": 2.116739625463112, "learning_rate": 4.005314534331181e-06, "loss": 0.1201, "step": 6339 }, { "epoch": 1.73, "grad_norm": 1.5403185486047273, "learning_rate": 4.003870352337075e-06, "loss": 0.0921, "step": 6340 }, { "epoch": 1.73, "grad_norm": 1.7762830646816974, "learning_rate": 4.00242625687615e-06, "loss": 0.1129, "step": 6341 }, { "epoch": 1.73, "grad_norm": 2.011306506880436, "learning_rate": 4.000982248073858e-06, "loss": 0.1199, "step": 6342 }, { "epoch": 1.73, "grad_norm": 1.578675225508738, "learning_rate": 3.999538326055636e-06, "loss": 0.0885, "step": 6343 }, { "epoch": 1.73, "grad_norm": 1.8467848082906557, "learning_rate": 3.998094490946922e-06, "loss": 0.1165, "step": 6344 }, { "epoch": 1.73, "grad_norm": 1.7278962800385307, "learning_rate": 3.996650742873135e-06, "loss": 0.1072, "step": 6345 }, { "epoch": 1.73, "grad_norm": 1.7610107380318833, "learning_rate": 3.995207081959696e-06, "loss": 0.1027, "step": 6346 }, { "epoch": 1.73, "grad_norm": 1.7415802279283394, "learning_rate": 3.993763508332014e-06, "loss": 0.0932, "step": 6347 }, { "epoch": 1.73, "grad_norm": 1.7691901781920214, "learning_rate": 3.992320022115492e-06, "loss": 0.1077, "step": 6348 }, { "epoch": 1.73, "grad_norm": 1.8045781793822298, "learning_rate": 3.990876623435522e-06, "loss": 0.1329, "step": 6349 }, { "epoch": 1.73, "grad_norm": 2.318751822292224, "learning_rate": 3.989433312417497e-06, "loss": 0.116, "step": 6350 }, { "epoch": 1.73, "grad_norm": 1.8800514274353077, "learning_rate": 3.987990089186789e-06, "loss": 0.1198, "step": 6351 }, { "epoch": 1.73, "grad_norm": 1.834117538687703, "learning_rate": 3.9865469538687765e-06, "loss": 0.1005, "step": 6352 }, { "epoch": 1.73, "grad_norm": 1.8260038407520658, "learning_rate": 3.985103906588821e-06, "loss": 0.105, "step": 6353 }, { "epoch": 1.73, "grad_norm": 1.4648568153538277, "learning_rate": 3.983660947472279e-06, "loss": 0.0859, "step": 6354 }, { "epoch": 1.73, "grad_norm": 1.6718674516106917, "learning_rate": 3.9822180766445e-06, "loss": 0.1002, "step": 6355 }, { "epoch": 1.74, "grad_norm": 1.8148230452477176, "learning_rate": 3.980775294230824e-06, "loss": 0.125, "step": 6356 }, { "epoch": 1.74, "grad_norm": 2.038399096135445, "learning_rate": 3.979332600356587e-06, "loss": 0.1073, "step": 6357 }, { "epoch": 1.74, "grad_norm": 1.6525805189891618, "learning_rate": 3.977889995147114e-06, "loss": 0.1006, "step": 6358 }, { "epoch": 1.74, "grad_norm": 1.6020102534148162, "learning_rate": 3.976447478727723e-06, "loss": 0.0945, "step": 6359 }, { "epoch": 1.74, "grad_norm": 1.8308861435571353, "learning_rate": 3.9750050512237224e-06, "loss": 0.1098, "step": 6360 }, { "epoch": 1.74, "grad_norm": 1.49953364426434, "learning_rate": 3.973562712760421e-06, "loss": 0.1067, "step": 6361 }, { "epoch": 1.74, "grad_norm": 1.8007984238567853, "learning_rate": 3.9721204634631075e-06, "loss": 0.1019, "step": 6362 }, { "epoch": 1.74, "grad_norm": 1.7925130265180567, "learning_rate": 3.970678303457073e-06, "loss": 0.1267, "step": 6363 }, { "epoch": 1.74, "grad_norm": 1.6890879945945154, "learning_rate": 3.969236232867594e-06, "loss": 0.1071, "step": 6364 }, { "epoch": 1.74, "grad_norm": 1.7418063945039548, "learning_rate": 3.9677942518199465e-06, "loss": 0.103, "step": 6365 }, { "epoch": 1.74, "grad_norm": 1.699227774074572, "learning_rate": 3.96635236043939e-06, "loss": 0.0994, "step": 6366 }, { "epoch": 1.74, "grad_norm": 1.9933384691082228, "learning_rate": 3.9649105588511854e-06, "loss": 0.1265, "step": 6367 }, { "epoch": 1.74, "grad_norm": 1.759316701935023, "learning_rate": 3.963468847180576e-06, "loss": 0.1003, "step": 6368 }, { "epoch": 1.74, "grad_norm": 1.6357790990251504, "learning_rate": 3.962027225552807e-06, "loss": 0.0954, "step": 6369 }, { "epoch": 1.74, "grad_norm": 1.9473634041898158, "learning_rate": 3.960585694093108e-06, "loss": 0.1313, "step": 6370 }, { "epoch": 1.74, "grad_norm": 1.643362854663145, "learning_rate": 3.9591442529267065e-06, "loss": 0.0983, "step": 6371 }, { "epoch": 1.74, "grad_norm": 1.7562258538083544, "learning_rate": 3.957702902178816e-06, "loss": 0.1054, "step": 6372 }, { "epoch": 1.74, "grad_norm": 1.870502096037075, "learning_rate": 3.956261641974653e-06, "loss": 0.1146, "step": 6373 }, { "epoch": 1.74, "grad_norm": 1.8749019154037525, "learning_rate": 3.954820472439409e-06, "loss": 0.1133, "step": 6374 }, { "epoch": 1.74, "grad_norm": 1.879597819384659, "learning_rate": 3.953379393698286e-06, "loss": 0.1193, "step": 6375 }, { "epoch": 1.74, "grad_norm": 1.7697407602776931, "learning_rate": 3.951938405876464e-06, "loss": 0.1071, "step": 6376 }, { "epoch": 1.74, "grad_norm": 1.6537547650649638, "learning_rate": 3.950497509099124e-06, "loss": 0.1029, "step": 6377 }, { "epoch": 1.74, "grad_norm": 1.5299361716564939, "learning_rate": 3.9490567034914335e-06, "loss": 0.091, "step": 6378 }, { "epoch": 1.74, "grad_norm": 2.027042229692232, "learning_rate": 3.947615989178558e-06, "loss": 0.1217, "step": 6379 }, { "epoch": 1.74, "grad_norm": 1.6522728425931497, "learning_rate": 3.946175366285647e-06, "loss": 0.0941, "step": 6380 }, { "epoch": 1.74, "grad_norm": 1.7413956911920896, "learning_rate": 3.9447348349378514e-06, "loss": 0.0967, "step": 6381 }, { "epoch": 1.74, "grad_norm": 1.7770048899054487, "learning_rate": 3.943294395260305e-06, "loss": 0.1025, "step": 6382 }, { "epoch": 1.74, "grad_norm": 1.3966216314330149, "learning_rate": 3.94185404737814e-06, "loss": 0.0819, "step": 6383 }, { "epoch": 1.74, "grad_norm": 1.9205587659461516, "learning_rate": 3.940413791416477e-06, "loss": 0.131, "step": 6384 }, { "epoch": 1.74, "grad_norm": 1.6689883052053822, "learning_rate": 3.938973627500434e-06, "loss": 0.1076, "step": 6385 }, { "epoch": 1.74, "grad_norm": 1.660496860500344, "learning_rate": 3.937533555755111e-06, "loss": 0.1046, "step": 6386 }, { "epoch": 1.74, "grad_norm": 1.733693328448356, "learning_rate": 3.936093576305613e-06, "loss": 0.1135, "step": 6387 }, { "epoch": 1.74, "grad_norm": 1.9321748449062366, "learning_rate": 3.9346536892770245e-06, "loss": 0.1186, "step": 6388 }, { "epoch": 1.74, "grad_norm": 1.618835787426827, "learning_rate": 3.933213894794432e-06, "loss": 0.1037, "step": 6389 }, { "epoch": 1.74, "grad_norm": 1.581015278960825, "learning_rate": 3.9317741929829036e-06, "loss": 0.1024, "step": 6390 }, { "epoch": 1.74, "grad_norm": 1.7747615736064235, "learning_rate": 3.930334583967514e-06, "loss": 0.1076, "step": 6391 }, { "epoch": 1.75, "grad_norm": 1.4866400982489236, "learning_rate": 3.928895067873313e-06, "loss": 0.0903, "step": 6392 }, { "epoch": 1.75, "grad_norm": 1.8653124645872947, "learning_rate": 3.927455644825356e-06, "loss": 0.1255, "step": 6393 }, { "epoch": 1.75, "grad_norm": 1.9008728108769946, "learning_rate": 3.926016314948682e-06, "loss": 0.1218, "step": 6394 }, { "epoch": 1.75, "grad_norm": 1.7312758460558038, "learning_rate": 3.924577078368326e-06, "loss": 0.1055, "step": 6395 }, { "epoch": 1.75, "grad_norm": 1.7965784299992846, "learning_rate": 3.923137935209311e-06, "loss": 0.0999, "step": 6396 }, { "epoch": 1.75, "grad_norm": 1.7056743541019883, "learning_rate": 3.9216988855966595e-06, "loss": 0.103, "step": 6397 }, { "epoch": 1.75, "grad_norm": 1.6843522577964418, "learning_rate": 3.920259929655376e-06, "loss": 0.1074, "step": 6398 }, { "epoch": 1.75, "grad_norm": 1.6599475380326825, "learning_rate": 3.918821067510464e-06, "loss": 0.1116, "step": 6399 }, { "epoch": 1.75, "grad_norm": 1.8124204355444313, "learning_rate": 3.9173822992869166e-06, "loss": 0.1197, "step": 6400 }, { "epoch": 1.75, "grad_norm": 1.672096977376338, "learning_rate": 3.915943625109719e-06, "loss": 0.1004, "step": 6401 }, { "epoch": 1.75, "grad_norm": 1.812939294905621, "learning_rate": 3.914505045103845e-06, "loss": 0.114, "step": 6402 }, { "epoch": 1.75, "grad_norm": 1.5263461052725322, "learning_rate": 3.9130665593942695e-06, "loss": 0.0801, "step": 6403 }, { "epoch": 1.75, "grad_norm": 1.9436320032795122, "learning_rate": 3.911628168105946e-06, "loss": 0.1096, "step": 6404 }, { "epoch": 1.75, "grad_norm": 2.045023265162794, "learning_rate": 3.91018987136383e-06, "loss": 0.1174, "step": 6405 }, { "epoch": 1.75, "grad_norm": 1.7693608262918126, "learning_rate": 3.908751669292865e-06, "loss": 0.1186, "step": 6406 }, { "epoch": 1.75, "grad_norm": 1.8094180156407509, "learning_rate": 3.907313562017988e-06, "loss": 0.0993, "step": 6407 }, { "epoch": 1.75, "grad_norm": 1.9548927597012942, "learning_rate": 3.905875549664123e-06, "loss": 0.1085, "step": 6408 }, { "epoch": 1.75, "grad_norm": 2.3712759906977015, "learning_rate": 3.9044376323561955e-06, "loss": 0.1346, "step": 6409 }, { "epoch": 1.75, "grad_norm": 1.6796882524081158, "learning_rate": 3.902999810219109e-06, "loss": 0.1167, "step": 6410 }, { "epoch": 1.75, "grad_norm": 2.0736014706249133, "learning_rate": 3.901562083377772e-06, "loss": 0.14, "step": 6411 }, { "epoch": 1.75, "grad_norm": 1.6749096651001998, "learning_rate": 3.900124451957076e-06, "loss": 0.1022, "step": 6412 }, { "epoch": 1.75, "grad_norm": 1.8005797540982094, "learning_rate": 3.898686916081909e-06, "loss": 0.1088, "step": 6413 }, { "epoch": 1.75, "grad_norm": 1.6764234464253458, "learning_rate": 3.8972494758771455e-06, "loss": 0.108, "step": 6414 }, { "epoch": 1.75, "grad_norm": 1.577037439838749, "learning_rate": 3.895812131467661e-06, "loss": 0.103, "step": 6415 }, { "epoch": 1.75, "grad_norm": 1.7168569442585115, "learning_rate": 3.89437488297831e-06, "loss": 0.0908, "step": 6416 }, { "epoch": 1.75, "grad_norm": 1.779965992727559, "learning_rate": 3.892937730533951e-06, "loss": 0.1103, "step": 6417 }, { "epoch": 1.75, "grad_norm": 1.5831992631281655, "learning_rate": 3.891500674259425e-06, "loss": 0.0938, "step": 6418 }, { "epoch": 1.75, "grad_norm": 1.6234704476342032, "learning_rate": 3.89006371427957e-06, "loss": 0.1003, "step": 6419 }, { "epoch": 1.75, "grad_norm": 1.319157269093631, "learning_rate": 3.8886268507192116e-06, "loss": 0.0728, "step": 6420 }, { "epoch": 1.75, "grad_norm": 1.5664773761108455, "learning_rate": 3.887190083703174e-06, "loss": 0.0936, "step": 6421 }, { "epoch": 1.75, "grad_norm": 1.795138637449241, "learning_rate": 3.8857534133562625e-06, "loss": 0.1329, "step": 6422 }, { "epoch": 1.75, "grad_norm": 2.0248952350527865, "learning_rate": 3.884316839803284e-06, "loss": 0.1221, "step": 6423 }, { "epoch": 1.75, "grad_norm": 1.7496588609967472, "learning_rate": 3.88288036316903e-06, "loss": 0.1066, "step": 6424 }, { "epoch": 1.75, "grad_norm": 1.753005107062362, "learning_rate": 3.8814439835782895e-06, "loss": 0.112, "step": 6425 }, { "epoch": 1.75, "grad_norm": 1.8922359529202695, "learning_rate": 3.8800077011558354e-06, "loss": 0.1146, "step": 6426 }, { "epoch": 1.75, "grad_norm": 1.7295976390726013, "learning_rate": 3.8785715160264435e-06, "loss": 0.0882, "step": 6427 }, { "epoch": 1.75, "grad_norm": 1.7230143363248638, "learning_rate": 3.877135428314867e-06, "loss": 0.1074, "step": 6428 }, { "epoch": 1.76, "grad_norm": 1.7207681416753564, "learning_rate": 3.875699438145862e-06, "loss": 0.1078, "step": 6429 }, { "epoch": 1.76, "grad_norm": 1.6256700685162027, "learning_rate": 3.874263545644172e-06, "loss": 0.0961, "step": 6430 }, { "epoch": 1.76, "grad_norm": 1.8171232578098409, "learning_rate": 3.872827750934531e-06, "loss": 0.1172, "step": 6431 }, { "epoch": 1.76, "grad_norm": 1.464700928514557, "learning_rate": 3.871392054141665e-06, "loss": 0.09, "step": 6432 }, { "epoch": 1.76, "grad_norm": 1.8859538243111091, "learning_rate": 3.869956455390295e-06, "loss": 0.1136, "step": 6433 }, { "epoch": 1.76, "grad_norm": 1.6665449169631579, "learning_rate": 3.868520954805126e-06, "loss": 0.1124, "step": 6434 }, { "epoch": 1.76, "grad_norm": 1.654570204763802, "learning_rate": 3.867085552510865e-06, "loss": 0.0909, "step": 6435 }, { "epoch": 1.76, "grad_norm": 1.7954724488798945, "learning_rate": 3.865650248632199e-06, "loss": 0.1161, "step": 6436 }, { "epoch": 1.76, "grad_norm": 1.6960803927604093, "learning_rate": 3.864215043293817e-06, "loss": 0.1084, "step": 6437 }, { "epoch": 1.76, "grad_norm": 1.8189493126448733, "learning_rate": 3.86277993662039e-06, "loss": 0.119, "step": 6438 }, { "epoch": 1.76, "grad_norm": 1.925060860223729, "learning_rate": 3.861344928736588e-06, "loss": 0.1141, "step": 6439 }, { "epoch": 1.76, "grad_norm": 1.6462982336027874, "learning_rate": 3.859910019767065e-06, "loss": 0.0889, "step": 6440 }, { "epoch": 1.76, "grad_norm": 1.6880888071064972, "learning_rate": 3.858475209836476e-06, "loss": 0.0965, "step": 6441 }, { "epoch": 1.76, "grad_norm": 1.7741557268217825, "learning_rate": 3.8570404990694585e-06, "loss": 0.1078, "step": 6442 }, { "epoch": 1.76, "grad_norm": 1.6136132586681615, "learning_rate": 3.855605887590648e-06, "loss": 0.0867, "step": 6443 }, { "epoch": 1.76, "grad_norm": 1.9227031191709991, "learning_rate": 3.854171375524664e-06, "loss": 0.1253, "step": 6444 }, { "epoch": 1.76, "grad_norm": 1.608952264758711, "learning_rate": 3.8527369629961264e-06, "loss": 0.0998, "step": 6445 }, { "epoch": 1.76, "grad_norm": 1.9284906945854983, "learning_rate": 3.851302650129637e-06, "loss": 0.1144, "step": 6446 }, { "epoch": 1.76, "grad_norm": 1.720398041903394, "learning_rate": 3.849868437049799e-06, "loss": 0.0982, "step": 6447 }, { "epoch": 1.76, "grad_norm": 1.8307191966404819, "learning_rate": 3.8484343238811976e-06, "loss": 0.1037, "step": 6448 }, { "epoch": 1.76, "grad_norm": 1.7194028803150143, "learning_rate": 3.847000310748412e-06, "loss": 0.1026, "step": 6449 }, { "epoch": 1.76, "grad_norm": 1.5147489211052783, "learning_rate": 3.845566397776022e-06, "loss": 0.0872, "step": 6450 }, { "epoch": 1.76, "grad_norm": 1.7126656857846014, "learning_rate": 3.844132585088581e-06, "loss": 0.106, "step": 6451 }, { "epoch": 1.76, "grad_norm": 1.8425324651866652, "learning_rate": 3.84269887281065e-06, "loss": 0.1265, "step": 6452 }, { "epoch": 1.76, "grad_norm": 1.860898585348654, "learning_rate": 3.8412652610667725e-06, "loss": 0.1082, "step": 6453 }, { "epoch": 1.76, "grad_norm": 2.3912992664421173, "learning_rate": 3.839831749981486e-06, "loss": 0.1411, "step": 6454 }, { "epoch": 1.76, "grad_norm": 1.7318135671864818, "learning_rate": 3.838398339679316e-06, "loss": 0.1221, "step": 6455 }, { "epoch": 1.76, "grad_norm": 1.7509032358501055, "learning_rate": 3.836965030284788e-06, "loss": 0.1041, "step": 6456 }, { "epoch": 1.76, "grad_norm": 1.5399513053699183, "learning_rate": 3.835531821922405e-06, "loss": 0.0866, "step": 6457 }, { "epoch": 1.76, "grad_norm": 1.660993941393976, "learning_rate": 3.834098714716676e-06, "loss": 0.0935, "step": 6458 }, { "epoch": 1.76, "grad_norm": 1.8758154456516754, "learning_rate": 3.83266570879209e-06, "loss": 0.1264, "step": 6459 }, { "epoch": 1.76, "grad_norm": 1.5212660116844818, "learning_rate": 3.831232804273133e-06, "loss": 0.099, "step": 6460 }, { "epoch": 1.76, "grad_norm": 1.7630505757765147, "learning_rate": 3.829800001284278e-06, "loss": 0.1005, "step": 6461 }, { "epoch": 1.76, "grad_norm": 1.9531576469215322, "learning_rate": 3.828367299949998e-06, "loss": 0.0996, "step": 6462 }, { "epoch": 1.76, "grad_norm": 1.7749718001740593, "learning_rate": 3.826934700394743e-06, "loss": 0.1134, "step": 6463 }, { "epoch": 1.76, "grad_norm": 1.6729206379580095, "learning_rate": 3.8255022027429675e-06, "loss": 0.1092, "step": 6464 }, { "epoch": 1.76, "grad_norm": 1.5505312976891268, "learning_rate": 3.8240698071191096e-06, "loss": 0.0948, "step": 6465 }, { "epoch": 1.77, "grad_norm": 1.844643445515912, "learning_rate": 3.822637513647601e-06, "loss": 0.1244, "step": 6466 }, { "epoch": 1.77, "grad_norm": 1.490980276687843, "learning_rate": 3.821205322452863e-06, "loss": 0.0956, "step": 6467 }, { "epoch": 1.77, "grad_norm": 1.7459080697077198, "learning_rate": 3.819773233659314e-06, "loss": 0.0938, "step": 6468 }, { "epoch": 1.77, "grad_norm": 1.8999375104193943, "learning_rate": 3.818341247391351e-06, "loss": 0.1289, "step": 6469 }, { "epoch": 1.77, "grad_norm": 1.5190056298261871, "learning_rate": 3.816909363773377e-06, "loss": 0.0855, "step": 6470 }, { "epoch": 1.77, "grad_norm": 1.7734254151293125, "learning_rate": 3.815477582929773e-06, "loss": 0.1021, "step": 6471 }, { "epoch": 1.77, "grad_norm": 1.4988269917553074, "learning_rate": 3.814045904984922e-06, "loss": 0.0935, "step": 6472 }, { "epoch": 1.77, "grad_norm": 1.6231220216540578, "learning_rate": 3.812614330063189e-06, "loss": 0.0937, "step": 6473 }, { "epoch": 1.77, "grad_norm": 1.6387112535914998, "learning_rate": 3.811182858288938e-06, "loss": 0.096, "step": 6474 }, { "epoch": 1.77, "grad_norm": 1.926236321673452, "learning_rate": 3.809751489786515e-06, "loss": 0.1167, "step": 6475 }, { "epoch": 1.77, "grad_norm": 1.7127438060327105, "learning_rate": 3.8083202246802675e-06, "loss": 0.1005, "step": 6476 }, { "epoch": 1.77, "grad_norm": 1.4265618357966356, "learning_rate": 3.8068890630945244e-06, "loss": 0.0672, "step": 6477 }, { "epoch": 1.77, "grad_norm": 1.925076064315851, "learning_rate": 3.8054580051536127e-06, "loss": 0.1018, "step": 6478 }, { "epoch": 1.77, "grad_norm": 1.789362869258968, "learning_rate": 3.8040270509818446e-06, "loss": 0.1226, "step": 6479 }, { "epoch": 1.77, "grad_norm": 1.6415253230207796, "learning_rate": 3.802596200703531e-06, "loss": 0.091, "step": 6480 }, { "epoch": 1.77, "grad_norm": 2.0605143728519746, "learning_rate": 3.8011654544429626e-06, "loss": 0.1268, "step": 6481 }, { "epoch": 1.77, "grad_norm": 1.7230097741804673, "learning_rate": 3.799734812324434e-06, "loss": 0.0952, "step": 6482 }, { "epoch": 1.77, "grad_norm": 1.6546040043062766, "learning_rate": 3.798304274472219e-06, "loss": 0.1003, "step": 6483 }, { "epoch": 1.77, "grad_norm": 1.7718198969168648, "learning_rate": 3.796873841010591e-06, "loss": 0.1297, "step": 6484 }, { "epoch": 1.77, "grad_norm": 1.7966167868562755, "learning_rate": 3.795443512063808e-06, "loss": 0.1041, "step": 6485 }, { "epoch": 1.77, "grad_norm": 1.5890620885769258, "learning_rate": 3.794013287756125e-06, "loss": 0.0979, "step": 6486 }, { "epoch": 1.77, "grad_norm": 1.886271351498799, "learning_rate": 3.792583168211782e-06, "loss": 0.1072, "step": 6487 }, { "epoch": 1.77, "grad_norm": 1.616674247202884, "learning_rate": 3.7911531535550145e-06, "loss": 0.0931, "step": 6488 }, { "epoch": 1.77, "grad_norm": 1.9051382378988642, "learning_rate": 3.7897232439100455e-06, "loss": 0.1182, "step": 6489 }, { "epoch": 1.77, "grad_norm": 1.5975629571359193, "learning_rate": 3.788293439401093e-06, "loss": 0.0872, "step": 6490 }, { "epoch": 1.77, "grad_norm": 1.8772544938166351, "learning_rate": 3.7868637401523582e-06, "loss": 0.1236, "step": 6491 }, { "epoch": 1.77, "grad_norm": 1.7772421325054895, "learning_rate": 3.785434146288045e-06, "loss": 0.098, "step": 6492 }, { "epoch": 1.77, "grad_norm": 1.6555157819435316, "learning_rate": 3.7840046579323346e-06, "loss": 0.0962, "step": 6493 }, { "epoch": 1.77, "grad_norm": 1.5272594060672573, "learning_rate": 3.7825752752094113e-06, "loss": 0.0925, "step": 6494 }, { "epoch": 1.77, "grad_norm": 1.5116185690533241, "learning_rate": 3.7811459982434414e-06, "loss": 0.0943, "step": 6495 }, { "epoch": 1.77, "grad_norm": 1.6697508126442953, "learning_rate": 3.779716827158587e-06, "loss": 0.0974, "step": 6496 }, { "epoch": 1.77, "grad_norm": 1.6515327339448527, "learning_rate": 3.7782877620789966e-06, "loss": 0.0985, "step": 6497 }, { "epoch": 1.77, "grad_norm": 1.7202019819050054, "learning_rate": 3.776858803128818e-06, "loss": 0.1089, "step": 6498 }, { "epoch": 1.77, "grad_norm": 2.036772158605855, "learning_rate": 3.775429950432176e-06, "loss": 0.1081, "step": 6499 }, { "epoch": 1.77, "grad_norm": 1.738125713457519, "learning_rate": 3.7740012041132016e-06, "loss": 0.1037, "step": 6500 }, { "epoch": 1.77, "grad_norm": 1.8471002559587975, "learning_rate": 3.7725725642960047e-06, "loss": 0.1102, "step": 6501 }, { "epoch": 1.78, "grad_norm": 1.6684390044862578, "learning_rate": 3.7711440311046928e-06, "loss": 0.1105, "step": 6502 }, { "epoch": 1.78, "grad_norm": 1.7633960570193847, "learning_rate": 3.769715604663358e-06, "loss": 0.104, "step": 6503 }, { "epoch": 1.78, "grad_norm": 1.7255808746356345, "learning_rate": 3.7682872850960933e-06, "loss": 0.1038, "step": 6504 }, { "epoch": 1.78, "grad_norm": 1.7367369773595072, "learning_rate": 3.766859072526969e-06, "loss": 0.0968, "step": 6505 }, { "epoch": 1.78, "grad_norm": 1.923610787255813, "learning_rate": 3.7654309670800575e-06, "loss": 0.1173, "step": 6506 }, { "epoch": 1.78, "grad_norm": 2.014371567413567, "learning_rate": 3.7640029688794155e-06, "loss": 0.1176, "step": 6507 }, { "epoch": 1.78, "grad_norm": 1.6845506914753037, "learning_rate": 3.7625750780490942e-06, "loss": 0.1261, "step": 6508 }, { "epoch": 1.78, "grad_norm": 1.4925341793173412, "learning_rate": 3.761147294713131e-06, "loss": 0.0762, "step": 6509 }, { "epoch": 1.78, "grad_norm": 1.771630727913245, "learning_rate": 3.7597196189955597e-06, "loss": 0.1035, "step": 6510 }, { "epoch": 1.78, "grad_norm": 1.8120225083224282, "learning_rate": 3.7582920510203976e-06, "loss": 0.1178, "step": 6511 }, { "epoch": 1.78, "grad_norm": 1.5605328304971582, "learning_rate": 3.7568645909116608e-06, "loss": 0.1069, "step": 6512 }, { "epoch": 1.78, "grad_norm": 1.960781930147555, "learning_rate": 3.755437238793349e-06, "loss": 0.1126, "step": 6513 }, { "epoch": 1.78, "grad_norm": 1.548207826420507, "learning_rate": 3.7540099947894576e-06, "loss": 0.1023, "step": 6514 }, { "epoch": 1.78, "grad_norm": 1.5802087678438415, "learning_rate": 3.752582859023968e-06, "loss": 0.0942, "step": 6515 }, { "epoch": 1.78, "grad_norm": 1.6094298276627206, "learning_rate": 3.751155831620858e-06, "loss": 0.1077, "step": 6516 }, { "epoch": 1.78, "grad_norm": 1.5037657871299246, "learning_rate": 3.7497289127040882e-06, "loss": 0.091, "step": 6517 }, { "epoch": 1.78, "grad_norm": 1.8227546607597456, "learning_rate": 3.748302102397618e-06, "loss": 0.1205, "step": 6518 }, { "epoch": 1.78, "grad_norm": 1.9321856952072975, "learning_rate": 3.7468754008253915e-06, "loss": 0.1338, "step": 6519 }, { "epoch": 1.78, "grad_norm": 1.5676908198961796, "learning_rate": 3.7454488081113473e-06, "loss": 0.0866, "step": 6520 }, { "epoch": 1.78, "grad_norm": 1.5494013831523572, "learning_rate": 3.7440223243794095e-06, "loss": 0.1098, "step": 6521 }, { "epoch": 1.78, "grad_norm": 1.5999213286153722, "learning_rate": 3.7425959497534997e-06, "loss": 0.1094, "step": 6522 }, { "epoch": 1.78, "grad_norm": 1.6183921382450126, "learning_rate": 3.741169684357522e-06, "loss": 0.106, "step": 6523 }, { "epoch": 1.78, "grad_norm": 1.6362738729307476, "learning_rate": 3.7397435283153795e-06, "loss": 0.1035, "step": 6524 }, { "epoch": 1.78, "grad_norm": 1.751160447794452, "learning_rate": 3.7383174817509583e-06, "loss": 0.0996, "step": 6525 }, { "epoch": 1.78, "grad_norm": 2.067889380664673, "learning_rate": 3.7368915447881404e-06, "loss": 0.1359, "step": 6526 }, { "epoch": 1.78, "grad_norm": 1.9671021991120594, "learning_rate": 3.7354657175507947e-06, "loss": 0.1298, "step": 6527 }, { "epoch": 1.78, "grad_norm": 1.9459488418555648, "learning_rate": 3.7340400001627832e-06, "loss": 0.1276, "step": 6528 }, { "epoch": 1.78, "grad_norm": 1.8097339360372815, "learning_rate": 3.732614392747954e-06, "loss": 0.1138, "step": 6529 }, { "epoch": 1.78, "grad_norm": 1.9262320793680647, "learning_rate": 3.7311888954301534e-06, "loss": 0.1298, "step": 6530 }, { "epoch": 1.78, "grad_norm": 1.602407186642757, "learning_rate": 3.7297635083332097e-06, "loss": 0.1134, "step": 6531 }, { "epoch": 1.78, "grad_norm": 1.4676414332743384, "learning_rate": 3.728338231580948e-06, "loss": 0.0989, "step": 6532 }, { "epoch": 1.78, "grad_norm": 1.851882378144772, "learning_rate": 3.7269130652971787e-06, "loss": 0.1036, "step": 6533 }, { "epoch": 1.78, "grad_norm": 1.556133849475282, "learning_rate": 3.725488009605708e-06, "loss": 0.0962, "step": 6534 }, { "epoch": 1.78, "grad_norm": 1.894602342366671, "learning_rate": 3.7240630646303262e-06, "loss": 0.1247, "step": 6535 }, { "epoch": 1.78, "grad_norm": 1.6930784951732443, "learning_rate": 3.7226382304948215e-06, "loss": 0.0863, "step": 6536 }, { "epoch": 1.78, "grad_norm": 1.7036900265105728, "learning_rate": 3.721213507322965e-06, "loss": 0.116, "step": 6537 }, { "epoch": 1.78, "grad_norm": 1.7029355240497255, "learning_rate": 3.7197888952385236e-06, "loss": 0.1019, "step": 6538 }, { "epoch": 1.79, "grad_norm": 1.6303880208432018, "learning_rate": 3.7183643943652513e-06, "loss": 0.0951, "step": 6539 }, { "epoch": 1.79, "grad_norm": 1.7987873872603957, "learning_rate": 3.7169400048268945e-06, "loss": 0.1199, "step": 6540 }, { "epoch": 1.79, "grad_norm": 1.7566828248635518, "learning_rate": 3.7155157267471863e-06, "loss": 0.1067, "step": 6541 }, { "epoch": 1.79, "grad_norm": 1.9579602180485876, "learning_rate": 3.7140915602498574e-06, "loss": 0.1112, "step": 6542 }, { "epoch": 1.79, "grad_norm": 1.733989837374824, "learning_rate": 3.712667505458622e-06, "loss": 0.1213, "step": 6543 }, { "epoch": 1.79, "grad_norm": 1.6021641639621227, "learning_rate": 3.7112435624971855e-06, "loss": 0.091, "step": 6544 }, { "epoch": 1.79, "grad_norm": 1.5399654835767467, "learning_rate": 3.7098197314892493e-06, "loss": 0.0853, "step": 6545 }, { "epoch": 1.79, "grad_norm": 1.5430774342072628, "learning_rate": 3.7083960125584944e-06, "loss": 0.0906, "step": 6546 }, { "epoch": 1.79, "grad_norm": 1.8571089976689217, "learning_rate": 3.7069724058286045e-06, "loss": 0.1092, "step": 6547 }, { "epoch": 1.79, "grad_norm": 1.498037968860935, "learning_rate": 3.7055489114232433e-06, "loss": 0.0996, "step": 6548 }, { "epoch": 1.79, "grad_norm": 1.6837886361606174, "learning_rate": 3.7041255294660723e-06, "loss": 0.1008, "step": 6549 }, { "epoch": 1.79, "grad_norm": 1.6798957014764648, "learning_rate": 3.702702260080735e-06, "loss": 0.0995, "step": 6550 }, { "epoch": 1.79, "grad_norm": 1.9628604863377934, "learning_rate": 3.7012791033908766e-06, "loss": 0.1125, "step": 6551 }, { "epoch": 1.79, "grad_norm": 1.4901768766104952, "learning_rate": 3.6998560595201188e-06, "loss": 0.0963, "step": 6552 }, { "epoch": 1.79, "grad_norm": 1.8730088759216363, "learning_rate": 3.698433128592086e-06, "loss": 0.1094, "step": 6553 }, { "epoch": 1.79, "grad_norm": 1.7366970038812257, "learning_rate": 3.6970103107303845e-06, "loss": 0.1126, "step": 6554 }, { "epoch": 1.79, "grad_norm": 1.6866143118892862, "learning_rate": 3.695587606058616e-06, "loss": 0.1044, "step": 6555 }, { "epoch": 1.79, "grad_norm": 1.6264082540919602, "learning_rate": 3.6941650147003655e-06, "loss": 0.0993, "step": 6556 }, { "epoch": 1.79, "grad_norm": 1.832193638718385, "learning_rate": 3.692742536779219e-06, "loss": 0.1214, "step": 6557 }, { "epoch": 1.79, "grad_norm": 1.7615475855151723, "learning_rate": 3.6913201724187397e-06, "loss": 0.0921, "step": 6558 }, { "epoch": 1.79, "grad_norm": 1.6281581195082457, "learning_rate": 3.6898979217424934e-06, "loss": 0.0949, "step": 6559 }, { "epoch": 1.79, "grad_norm": 1.5124977547077132, "learning_rate": 3.688475784874026e-06, "loss": 0.09, "step": 6560 }, { "epoch": 1.79, "grad_norm": 1.6343000926567257, "learning_rate": 3.68705376193688e-06, "loss": 0.097, "step": 6561 }, { "epoch": 1.79, "grad_norm": 1.643227847149309, "learning_rate": 3.685631853054583e-06, "loss": 0.0934, "step": 6562 }, { "epoch": 1.79, "grad_norm": 1.8801033198210357, "learning_rate": 3.6842100583506607e-06, "loss": 0.1252, "step": 6563 }, { "epoch": 1.79, "grad_norm": 1.8324821365752662, "learning_rate": 3.682788377948617e-06, "loss": 0.1105, "step": 6564 }, { "epoch": 1.79, "grad_norm": 1.756068432387505, "learning_rate": 3.681366811971957e-06, "loss": 0.1207, "step": 6565 }, { "epoch": 1.79, "grad_norm": 1.8340457712239377, "learning_rate": 3.6799453605441695e-06, "loss": 0.1221, "step": 6566 }, { "epoch": 1.79, "grad_norm": 7.190789473925438, "learning_rate": 3.6785240237887355e-06, "loss": 0.0958, "step": 6567 }, { "epoch": 1.79, "grad_norm": 1.8477917898717289, "learning_rate": 3.6771028018291244e-06, "loss": 0.1053, "step": 6568 }, { "epoch": 1.79, "grad_norm": 1.8543499557881225, "learning_rate": 3.675681694788801e-06, "loss": 0.1084, "step": 6569 }, { "epoch": 1.79, "grad_norm": 1.7149131192807923, "learning_rate": 3.6742607027912093e-06, "loss": 0.1064, "step": 6570 }, { "epoch": 1.79, "grad_norm": 1.7968793843237767, "learning_rate": 3.6728398259597965e-06, "loss": 0.1198, "step": 6571 }, { "epoch": 1.79, "grad_norm": 1.5996142133101694, "learning_rate": 3.6714190644179894e-06, "loss": 0.0902, "step": 6572 }, { "epoch": 1.79, "grad_norm": 1.8663270939451262, "learning_rate": 3.6699984182892113e-06, "loss": 0.1189, "step": 6573 }, { "epoch": 1.79, "grad_norm": 1.763011937746098, "learning_rate": 3.668577887696869e-06, "loss": 0.1045, "step": 6574 }, { "epoch": 1.79, "grad_norm": 1.7683328939236824, "learning_rate": 3.6671574727643694e-06, "loss": 0.1124, "step": 6575 }, { "epoch": 1.8, "grad_norm": 2.7431740139869394, "learning_rate": 3.665737173615096e-06, "loss": 0.1235, "step": 6576 }, { "epoch": 1.8, "grad_norm": 1.9963003822918417, "learning_rate": 3.664316990372434e-06, "loss": 0.0999, "step": 6577 }, { "epoch": 1.8, "grad_norm": 1.6112830497399264, "learning_rate": 3.662896923159752e-06, "loss": 0.085, "step": 6578 }, { "epoch": 1.8, "grad_norm": 1.4831875313805127, "learning_rate": 3.6614769721004127e-06, "loss": 0.087, "step": 6579 }, { "epoch": 1.8, "grad_norm": 1.7635098423897353, "learning_rate": 3.6600571373177616e-06, "loss": 0.11, "step": 6580 }, { "epoch": 1.8, "grad_norm": 1.9707760650085377, "learning_rate": 3.658637418935146e-06, "loss": 0.1207, "step": 6581 }, { "epoch": 1.8, "grad_norm": 1.9438004794542514, "learning_rate": 3.6572178170758874e-06, "loss": 0.1049, "step": 6582 }, { "epoch": 1.8, "grad_norm": 1.5553485003266168, "learning_rate": 3.655798331863314e-06, "loss": 0.075, "step": 6583 }, { "epoch": 1.8, "grad_norm": 1.550515940894469, "learning_rate": 3.65437896342073e-06, "loss": 0.0997, "step": 6584 }, { "epoch": 1.8, "grad_norm": 1.6332365949418675, "learning_rate": 3.6529597118714377e-06, "loss": 0.0984, "step": 6585 }, { "epoch": 1.8, "grad_norm": 1.729818947015424, "learning_rate": 3.6515405773387257e-06, "loss": 0.1157, "step": 6586 }, { "epoch": 1.8, "grad_norm": 1.9000652838205647, "learning_rate": 3.650121559945874e-06, "loss": 0.1368, "step": 6587 }, { "epoch": 1.8, "grad_norm": 1.7485175813389107, "learning_rate": 3.648702659816149e-06, "loss": 0.1219, "step": 6588 }, { "epoch": 1.8, "grad_norm": 1.8980224493213316, "learning_rate": 3.647283877072815e-06, "loss": 0.1257, "step": 6589 }, { "epoch": 1.8, "grad_norm": 1.935666943375356, "learning_rate": 3.6458652118391164e-06, "loss": 0.1325, "step": 6590 }, { "epoch": 1.8, "grad_norm": 1.6782058162610771, "learning_rate": 3.644446664238294e-06, "loss": 0.1089, "step": 6591 }, { "epoch": 1.8, "grad_norm": 1.7103254299325488, "learning_rate": 3.6430282343935754e-06, "loss": 0.1062, "step": 6592 }, { "epoch": 1.8, "grad_norm": 1.69967685470295, "learning_rate": 3.6416099224281787e-06, "loss": 0.1137, "step": 6593 }, { "epoch": 1.8, "grad_norm": 1.5088365327281856, "learning_rate": 3.64019172846531e-06, "loss": 0.0913, "step": 6594 }, { "epoch": 1.8, "grad_norm": 1.7313580734523966, "learning_rate": 3.6387736526281714e-06, "loss": 0.1135, "step": 6595 }, { "epoch": 1.8, "grad_norm": 1.6469220829331832, "learning_rate": 3.637355695039947e-06, "loss": 0.1023, "step": 6596 }, { "epoch": 1.8, "grad_norm": 1.6179128832182075, "learning_rate": 3.6359378558238145e-06, "loss": 0.107, "step": 6597 }, { "epoch": 1.8, "grad_norm": 1.9003522616355872, "learning_rate": 3.634520135102941e-06, "loss": 0.1149, "step": 6598 }, { "epoch": 1.8, "grad_norm": 1.7485551759034508, "learning_rate": 3.6331025330004834e-06, "loss": 0.1171, "step": 6599 }, { "epoch": 1.8, "grad_norm": 1.7077247462811909, "learning_rate": 3.6316850496395863e-06, "loss": 0.1165, "step": 6600 }, { "epoch": 1.8, "grad_norm": 1.8310493916699688, "learning_rate": 3.630267685143388e-06, "loss": 0.1227, "step": 6601 }, { "epoch": 1.8, "grad_norm": 1.7876844188489902, "learning_rate": 3.628850439635012e-06, "loss": 0.1169, "step": 6602 }, { "epoch": 1.8, "grad_norm": 1.9918548636072828, "learning_rate": 3.627433313237576e-06, "loss": 0.1083, "step": 6603 }, { "epoch": 1.8, "grad_norm": 1.6044377292332295, "learning_rate": 3.6260163060741816e-06, "loss": 0.1004, "step": 6604 }, { "epoch": 1.8, "grad_norm": 1.5365220455039013, "learning_rate": 3.624599418267927e-06, "loss": 0.106, "step": 6605 }, { "epoch": 1.8, "grad_norm": 1.6148062073085345, "learning_rate": 3.623182649941892e-06, "loss": 0.1016, "step": 6606 }, { "epoch": 1.8, "grad_norm": 1.719216853740819, "learning_rate": 3.621766001219156e-06, "loss": 0.1117, "step": 6607 }, { "epoch": 1.8, "grad_norm": 1.4723967861150682, "learning_rate": 3.620349472222777e-06, "loss": 0.0869, "step": 6608 }, { "epoch": 1.8, "grad_norm": 1.7147562278307946, "learning_rate": 3.6189330630758124e-06, "loss": 0.116, "step": 6609 }, { "epoch": 1.8, "grad_norm": 1.5904777964315342, "learning_rate": 3.6175167739013018e-06, "loss": 0.1086, "step": 6610 }, { "epoch": 1.8, "grad_norm": 1.7210591185891906, "learning_rate": 3.616100604822279e-06, "loss": 0.1074, "step": 6611 }, { "epoch": 1.81, "grad_norm": 1.6278747572304295, "learning_rate": 3.6146845559617634e-06, "loss": 0.1015, "step": 6612 }, { "epoch": 1.81, "grad_norm": 1.7660752873198828, "learning_rate": 3.6132686274427695e-06, "loss": 0.1112, "step": 6613 }, { "epoch": 1.81, "grad_norm": 1.6511927942425266, "learning_rate": 3.6118528193882974e-06, "loss": 0.0967, "step": 6614 }, { "epoch": 1.81, "grad_norm": 1.4793212175717843, "learning_rate": 3.610437131921336e-06, "loss": 0.084, "step": 6615 }, { "epoch": 1.81, "grad_norm": 1.6749803063431985, "learning_rate": 3.6090215651648664e-06, "loss": 0.0923, "step": 6616 }, { "epoch": 1.81, "grad_norm": 1.6607795175037037, "learning_rate": 3.6076061192418582e-06, "loss": 0.0954, "step": 6617 }, { "epoch": 1.81, "grad_norm": 1.6957107538803322, "learning_rate": 3.6061907942752677e-06, "loss": 0.0998, "step": 6618 }, { "epoch": 1.81, "grad_norm": 1.6934360390074377, "learning_rate": 3.6047755903880478e-06, "loss": 0.1052, "step": 6619 }, { "epoch": 1.81, "grad_norm": 1.8880189780865968, "learning_rate": 3.603360507703133e-06, "loss": 0.1146, "step": 6620 }, { "epoch": 1.81, "grad_norm": 1.5854975026932092, "learning_rate": 3.601945546343453e-06, "loss": 0.1057, "step": 6621 }, { "epoch": 1.81, "grad_norm": 1.8044575713500113, "learning_rate": 3.600530706431922e-06, "loss": 0.1092, "step": 6622 }, { "epoch": 1.81, "grad_norm": 1.7912558113702912, "learning_rate": 3.599115988091449e-06, "loss": 0.1095, "step": 6623 }, { "epoch": 1.81, "grad_norm": 1.6965103998930002, "learning_rate": 3.5977013914449264e-06, "loss": 0.1094, "step": 6624 }, { "epoch": 1.81, "grad_norm": 1.744635719859434, "learning_rate": 3.596286916615244e-06, "loss": 0.1047, "step": 6625 }, { "epoch": 1.81, "grad_norm": 1.5825141605672404, "learning_rate": 3.5948725637252713e-06, "loss": 0.0978, "step": 6626 }, { "epoch": 1.81, "grad_norm": 1.8310636951831805, "learning_rate": 3.5934583328978766e-06, "loss": 0.1228, "step": 6627 }, { "epoch": 1.81, "grad_norm": 1.5778545050024169, "learning_rate": 3.5920442242559107e-06, "loss": 0.0991, "step": 6628 }, { "epoch": 1.81, "grad_norm": 1.6525629357444294, "learning_rate": 3.590630237922218e-06, "loss": 0.101, "step": 6629 }, { "epoch": 1.81, "grad_norm": 1.4814570503972715, "learning_rate": 3.5892163740196272e-06, "loss": 0.0959, "step": 6630 }, { "epoch": 1.81, "grad_norm": 1.8054704239368022, "learning_rate": 3.587802632670965e-06, "loss": 0.1148, "step": 6631 }, { "epoch": 1.81, "grad_norm": 1.6428554507862272, "learning_rate": 3.586389013999039e-06, "loss": 0.0906, "step": 6632 }, { "epoch": 1.81, "grad_norm": 1.507040253721973, "learning_rate": 3.584975518126648e-06, "loss": 0.0893, "step": 6633 }, { "epoch": 1.81, "grad_norm": 1.65860531247142, "learning_rate": 3.5835621451765866e-06, "loss": 0.116, "step": 6634 }, { "epoch": 1.81, "grad_norm": 1.4872547970542909, "learning_rate": 3.5821488952716286e-06, "loss": 0.0844, "step": 6635 }, { "epoch": 1.81, "grad_norm": 1.8842555276067297, "learning_rate": 3.5807357685345456e-06, "loss": 0.1347, "step": 6636 }, { "epoch": 1.81, "grad_norm": 1.896502281290437, "learning_rate": 3.5793227650880928e-06, "loss": 0.1314, "step": 6637 }, { "epoch": 1.81, "grad_norm": 1.6397942078319327, "learning_rate": 3.577909885055019e-06, "loss": 0.1094, "step": 6638 }, { "epoch": 1.81, "grad_norm": 1.6323810148116837, "learning_rate": 3.576497128558057e-06, "loss": 0.0978, "step": 6639 }, { "epoch": 1.81, "grad_norm": 1.721832034126878, "learning_rate": 3.575084495719937e-06, "loss": 0.1122, "step": 6640 }, { "epoch": 1.81, "grad_norm": 1.6700910304015377, "learning_rate": 3.573671986663368e-06, "loss": 0.103, "step": 6641 }, { "epoch": 1.81, "grad_norm": 1.524416676418242, "learning_rate": 3.572259601511058e-06, "loss": 0.0963, "step": 6642 }, { "epoch": 1.81, "grad_norm": 1.9774518984208158, "learning_rate": 3.570847340385698e-06, "loss": 0.1222, "step": 6643 }, { "epoch": 1.81, "grad_norm": 1.3925679723058886, "learning_rate": 3.569435203409972e-06, "loss": 0.0773, "step": 6644 }, { "epoch": 1.81, "grad_norm": 2.064218949827412, "learning_rate": 3.5680231907065487e-06, "loss": 0.1123, "step": 6645 }, { "epoch": 1.81, "grad_norm": 1.6749895464489748, "learning_rate": 3.566611302398093e-06, "loss": 0.1004, "step": 6646 }, { "epoch": 1.81, "grad_norm": 1.8976003279843356, "learning_rate": 3.565199538607249e-06, "loss": 0.1296, "step": 6647 }, { "epoch": 1.81, "grad_norm": 1.9483420294138387, "learning_rate": 3.5637878994566616e-06, "loss": 0.1182, "step": 6648 }, { "epoch": 1.82, "grad_norm": 1.8236215575821053, "learning_rate": 3.562376385068955e-06, "loss": 0.1196, "step": 6649 }, { "epoch": 1.82, "grad_norm": 1.9116193902494234, "learning_rate": 3.560964995566749e-06, "loss": 0.1234, "step": 6650 }, { "epoch": 1.82, "grad_norm": 1.942537623052391, "learning_rate": 3.559553731072648e-06, "loss": 0.1004, "step": 6651 }, { "epoch": 1.82, "grad_norm": 1.9468654041217217, "learning_rate": 3.5581425917092515e-06, "loss": 0.1177, "step": 6652 }, { "epoch": 1.82, "grad_norm": 1.5204536546123528, "learning_rate": 3.5567315775991384e-06, "loss": 0.0888, "step": 6653 }, { "epoch": 1.82, "grad_norm": 1.569417922314605, "learning_rate": 3.555320688864889e-06, "loss": 0.0987, "step": 6654 }, { "epoch": 1.82, "grad_norm": 1.449190496989596, "learning_rate": 3.5539099256290616e-06, "loss": 0.0876, "step": 6655 }, { "epoch": 1.82, "grad_norm": 1.610140858560358, "learning_rate": 3.5524992880142118e-06, "loss": 0.1037, "step": 6656 }, { "epoch": 1.82, "grad_norm": 1.497627545502157, "learning_rate": 3.5510887761428764e-06, "loss": 0.0956, "step": 6657 }, { "epoch": 1.82, "grad_norm": 1.8890554553883838, "learning_rate": 3.549678390137592e-06, "loss": 0.1276, "step": 6658 }, { "epoch": 1.82, "grad_norm": 2.1464939030328303, "learning_rate": 3.548268130120871e-06, "loss": 0.1447, "step": 6659 }, { "epoch": 1.82, "grad_norm": 1.8510713925067024, "learning_rate": 3.5468579962152272e-06, "loss": 0.1277, "step": 6660 }, { "epoch": 1.82, "grad_norm": 1.6886607335146475, "learning_rate": 3.545447988543156e-06, "loss": 0.1196, "step": 6661 }, { "epoch": 1.82, "grad_norm": 1.6459459642977876, "learning_rate": 3.5440381072271447e-06, "loss": 0.0989, "step": 6662 }, { "epoch": 1.82, "grad_norm": 1.6308313115987418, "learning_rate": 3.5426283523896675e-06, "loss": 0.1003, "step": 6663 }, { "epoch": 1.82, "grad_norm": 1.714758082652541, "learning_rate": 3.5412187241531904e-06, "loss": 0.1053, "step": 6664 }, { "epoch": 1.82, "grad_norm": 1.8486763781642428, "learning_rate": 3.5398092226401644e-06, "loss": 0.1256, "step": 6665 }, { "epoch": 1.82, "grad_norm": 1.7317898142549648, "learning_rate": 3.5383998479730357e-06, "loss": 0.1101, "step": 6666 }, { "epoch": 1.82, "grad_norm": 1.6293508911559902, "learning_rate": 3.5369906002742332e-06, "loss": 0.1054, "step": 6667 }, { "epoch": 1.82, "grad_norm": 1.502620274140018, "learning_rate": 3.535581479666179e-06, "loss": 0.0952, "step": 6668 }, { "epoch": 1.82, "grad_norm": 1.8571531643163228, "learning_rate": 3.5341724862712817e-06, "loss": 0.1194, "step": 6669 }, { "epoch": 1.82, "grad_norm": 1.529365441293258, "learning_rate": 3.5327636202119404e-06, "loss": 0.1022, "step": 6670 }, { "epoch": 1.82, "grad_norm": 2.5997098329975605, "learning_rate": 3.531354881610539e-06, "loss": 0.132, "step": 6671 }, { "epoch": 1.82, "grad_norm": 2.0634077512790037, "learning_rate": 3.5299462705894598e-06, "loss": 0.1277, "step": 6672 }, { "epoch": 1.82, "grad_norm": 1.712346021264327, "learning_rate": 3.5285377872710634e-06, "loss": 0.1109, "step": 6673 }, { "epoch": 1.82, "grad_norm": 1.8082908928594958, "learning_rate": 3.5271294317777065e-06, "loss": 0.1143, "step": 6674 }, { "epoch": 1.82, "grad_norm": 1.7496781001347508, "learning_rate": 3.5257212042317302e-06, "loss": 0.1019, "step": 6675 }, { "epoch": 1.82, "grad_norm": 1.8523397397966093, "learning_rate": 3.524313104755468e-06, "loss": 0.1016, "step": 6676 }, { "epoch": 1.82, "grad_norm": 1.8880953859232175, "learning_rate": 3.522905133471237e-06, "loss": 0.1141, "step": 6677 }, { "epoch": 1.82, "grad_norm": 1.7093372756155623, "learning_rate": 3.5214972905013522e-06, "loss": 0.1093, "step": 6678 }, { "epoch": 1.82, "grad_norm": 1.7808568283203687, "learning_rate": 3.5200895759681086e-06, "loss": 0.1163, "step": 6679 }, { "epoch": 1.82, "grad_norm": 1.6069185492716866, "learning_rate": 3.518681989993795e-06, "loss": 0.0817, "step": 6680 }, { "epoch": 1.82, "grad_norm": 1.7441918794042037, "learning_rate": 3.517274532700686e-06, "loss": 0.1067, "step": 6681 }, { "epoch": 1.82, "grad_norm": 1.7518825389278112, "learning_rate": 3.5158672042110485e-06, "loss": 0.1147, "step": 6682 }, { "epoch": 1.82, "grad_norm": 1.6563856845623466, "learning_rate": 3.5144600046471338e-06, "loss": 0.0844, "step": 6683 }, { "epoch": 1.82, "grad_norm": 1.72670655235064, "learning_rate": 3.513052934131188e-06, "loss": 0.0914, "step": 6684 }, { "epoch": 1.83, "grad_norm": 1.8156312178897744, "learning_rate": 3.5116459927854383e-06, "loss": 0.1162, "step": 6685 }, { "epoch": 1.83, "grad_norm": 1.6438493316017246, "learning_rate": 3.510239180732109e-06, "loss": 0.1042, "step": 6686 }, { "epoch": 1.83, "grad_norm": 1.8100702720265167, "learning_rate": 3.5088324980934063e-06, "loss": 0.1193, "step": 6687 }, { "epoch": 1.83, "grad_norm": 2.1870433129210727, "learning_rate": 3.507425944991529e-06, "loss": 0.1472, "step": 6688 }, { "epoch": 1.83, "grad_norm": 1.723683519529843, "learning_rate": 3.506019521548661e-06, "loss": 0.1187, "step": 6689 }, { "epoch": 1.83, "grad_norm": 1.7641032390605524, "learning_rate": 3.5046132278869817e-06, "loss": 0.1153, "step": 6690 }, { "epoch": 1.83, "grad_norm": 1.7268412598914904, "learning_rate": 3.503207064128652e-06, "loss": 0.1052, "step": 6691 }, { "epoch": 1.83, "grad_norm": 1.819943604247568, "learning_rate": 3.501801030395826e-06, "loss": 0.1065, "step": 6692 }, { "epoch": 1.83, "grad_norm": 1.5897525342271133, "learning_rate": 3.5003951268106434e-06, "loss": 0.1057, "step": 6693 }, { "epoch": 1.83, "grad_norm": 1.6509375702566622, "learning_rate": 3.498989353495236e-06, "loss": 0.1103, "step": 6694 }, { "epoch": 1.83, "grad_norm": 1.517911308973224, "learning_rate": 3.4975837105717203e-06, "loss": 0.0984, "step": 6695 }, { "epoch": 1.83, "grad_norm": 1.4985240952344228, "learning_rate": 3.496178198162207e-06, "loss": 0.0952, "step": 6696 }, { "epoch": 1.83, "grad_norm": 1.7303422948099, "learning_rate": 3.4947728163887886e-06, "loss": 0.1083, "step": 6697 }, { "epoch": 1.83, "grad_norm": 1.5751225801727782, "learning_rate": 3.493367565373552e-06, "loss": 0.0896, "step": 6698 }, { "epoch": 1.83, "grad_norm": 1.713480744981879, "learning_rate": 3.491962445238569e-06, "loss": 0.108, "step": 6699 }, { "epoch": 1.83, "grad_norm": 1.8205060359609513, "learning_rate": 3.490557456105904e-06, "loss": 0.1178, "step": 6700 }, { "epoch": 1.83, "grad_norm": 1.8315003704488404, "learning_rate": 3.4891525980976034e-06, "loss": 0.0991, "step": 6701 }, { "epoch": 1.83, "grad_norm": 1.5252380469899487, "learning_rate": 3.4877478713357103e-06, "loss": 0.0875, "step": 6702 }, { "epoch": 1.83, "grad_norm": 1.567304591782693, "learning_rate": 3.4863432759422512e-06, "loss": 0.1016, "step": 6703 }, { "epoch": 1.83, "grad_norm": 1.6834994019410636, "learning_rate": 3.4849388120392422e-06, "loss": 0.1112, "step": 6704 }, { "epoch": 1.83, "grad_norm": 1.6498090709397835, "learning_rate": 3.483534479748688e-06, "loss": 0.1102, "step": 6705 }, { "epoch": 1.83, "grad_norm": 1.7217256942227208, "learning_rate": 3.482130279192584e-06, "loss": 0.1125, "step": 6706 }, { "epoch": 1.83, "grad_norm": 1.6065673306333461, "learning_rate": 3.4807262104929075e-06, "loss": 0.0936, "step": 6707 }, { "epoch": 1.83, "grad_norm": 1.6695670649788883, "learning_rate": 3.479322273771635e-06, "loss": 0.108, "step": 6708 }, { "epoch": 1.83, "grad_norm": 1.7062090325701962, "learning_rate": 3.4779184691507216e-06, "loss": 0.0953, "step": 6709 }, { "epoch": 1.83, "grad_norm": 1.7483330215077921, "learning_rate": 3.4765147967521174e-06, "loss": 0.1201, "step": 6710 }, { "epoch": 1.83, "grad_norm": 1.7602761782172296, "learning_rate": 3.4751112566977563e-06, "loss": 0.1014, "step": 6711 }, { "epoch": 1.83, "grad_norm": 1.8437897018666467, "learning_rate": 3.4737078491095657e-06, "loss": 0.1259, "step": 6712 }, { "epoch": 1.83, "grad_norm": 1.9879516369433305, "learning_rate": 3.4723045741094545e-06, "loss": 0.1167, "step": 6713 }, { "epoch": 1.83, "grad_norm": 1.761322537900197, "learning_rate": 3.4709014318193298e-06, "loss": 0.0986, "step": 6714 }, { "epoch": 1.83, "grad_norm": 1.7758254462963727, "learning_rate": 3.4694984223610774e-06, "loss": 0.105, "step": 6715 }, { "epoch": 1.83, "grad_norm": 1.9740879121199117, "learning_rate": 3.468095545856579e-06, "loss": 0.1246, "step": 6716 }, { "epoch": 1.83, "grad_norm": 1.7970209449196572, "learning_rate": 3.4666928024276993e-06, "loss": 0.0848, "step": 6717 }, { "epoch": 1.83, "grad_norm": 1.5823427681698838, "learning_rate": 3.4652901921962945e-06, "loss": 0.1068, "step": 6718 }, { "epoch": 1.83, "grad_norm": 1.649645613739333, "learning_rate": 3.4638877152842075e-06, "loss": 0.0922, "step": 6719 }, { "epoch": 1.83, "grad_norm": 1.7807371026905454, "learning_rate": 3.462485371813274e-06, "loss": 0.0992, "step": 6720 }, { "epoch": 1.83, "grad_norm": 1.601764657644291, "learning_rate": 3.461083161905311e-06, "loss": 0.095, "step": 6721 }, { "epoch": 1.84, "grad_norm": 1.7156968621413224, "learning_rate": 3.4596810856821304e-06, "loss": 0.104, "step": 6722 }, { "epoch": 1.84, "grad_norm": 1.7658327665849918, "learning_rate": 3.4582791432655273e-06, "loss": 0.1151, "step": 6723 }, { "epoch": 1.84, "grad_norm": 1.5504247168858252, "learning_rate": 3.45687733477729e-06, "loss": 0.0883, "step": 6724 }, { "epoch": 1.84, "grad_norm": 1.6160233594553108, "learning_rate": 3.4554756603391893e-06, "loss": 0.0916, "step": 6725 }, { "epoch": 1.84, "grad_norm": 1.8722890110879344, "learning_rate": 3.4540741200729903e-06, "loss": 0.1122, "step": 6726 }, { "epoch": 1.84, "grad_norm": 1.7485686337377087, "learning_rate": 3.4526727141004457e-06, "loss": 0.1199, "step": 6727 }, { "epoch": 1.84, "grad_norm": 1.880307211237019, "learning_rate": 3.45127144254329e-06, "loss": 0.1216, "step": 6728 }, { "epoch": 1.84, "grad_norm": 1.34255137706671, "learning_rate": 3.4498703055232575e-06, "loss": 0.0848, "step": 6729 }, { "epoch": 1.84, "grad_norm": 1.677612511180962, "learning_rate": 3.4484693031620563e-06, "loss": 0.1051, "step": 6730 }, { "epoch": 1.84, "grad_norm": 1.7683846101717453, "learning_rate": 3.447068435581398e-06, "loss": 0.0975, "step": 6731 }, { "epoch": 1.84, "grad_norm": 1.6382726809643489, "learning_rate": 3.4456677029029687e-06, "loss": 0.0961, "step": 6732 }, { "epoch": 1.84, "grad_norm": 1.5388093681534645, "learning_rate": 3.4442671052484545e-06, "loss": 0.0839, "step": 6733 }, { "epoch": 1.84, "grad_norm": 2.1401575872429013, "learning_rate": 3.4428666427395195e-06, "loss": 0.1392, "step": 6734 }, { "epoch": 1.84, "grad_norm": 1.5660715220995813, "learning_rate": 3.441466315497828e-06, "loss": 0.1029, "step": 6735 }, { "epoch": 1.84, "grad_norm": 1.9656216287487835, "learning_rate": 3.440066123645017e-06, "loss": 0.1153, "step": 6736 }, { "epoch": 1.84, "grad_norm": 1.6102068054946224, "learning_rate": 3.4386660673027267e-06, "loss": 0.0984, "step": 6737 }, { "epoch": 1.84, "grad_norm": 1.824043807478563, "learning_rate": 3.437266146592576e-06, "loss": 0.1075, "step": 6738 }, { "epoch": 1.84, "grad_norm": 1.5960429365643805, "learning_rate": 3.4358663616361775e-06, "loss": 0.0987, "step": 6739 }, { "epoch": 1.84, "grad_norm": 1.7422951708239955, "learning_rate": 3.434466712555128e-06, "loss": 0.0992, "step": 6740 }, { "epoch": 1.84, "grad_norm": 1.527141800737443, "learning_rate": 3.433067199471015e-06, "loss": 0.0958, "step": 6741 }, { "epoch": 1.84, "grad_norm": 1.8840447194428962, "learning_rate": 3.4316678225054106e-06, "loss": 0.1184, "step": 6742 }, { "epoch": 1.84, "grad_norm": 1.7874758430607982, "learning_rate": 3.430268581779883e-06, "loss": 0.1135, "step": 6743 }, { "epoch": 1.84, "grad_norm": 1.6386548939184866, "learning_rate": 3.428869477415979e-06, "loss": 0.1063, "step": 6744 }, { "epoch": 1.84, "grad_norm": 1.6393504532572505, "learning_rate": 3.427470509535241e-06, "loss": 0.0927, "step": 6745 }, { "epoch": 1.84, "grad_norm": 1.5403534182437106, "learning_rate": 3.4260716782591934e-06, "loss": 0.0864, "step": 6746 }, { "epoch": 1.84, "grad_norm": 1.6084930617423232, "learning_rate": 3.424672983709355e-06, "loss": 0.1006, "step": 6747 }, { "epoch": 1.84, "grad_norm": 1.730454347575827, "learning_rate": 3.423274426007226e-06, "loss": 0.0989, "step": 6748 }, { "epoch": 1.84, "grad_norm": 1.788461589945982, "learning_rate": 3.4218760052743018e-06, "loss": 0.1162, "step": 6749 }, { "epoch": 1.84, "grad_norm": 1.676860688678788, "learning_rate": 3.4204777216320607e-06, "loss": 0.1047, "step": 6750 }, { "epoch": 1.84, "grad_norm": 1.6787575061921869, "learning_rate": 3.4190795752019713e-06, "loss": 0.0912, "step": 6751 }, { "epoch": 1.84, "grad_norm": 1.6940592382887492, "learning_rate": 3.4176815661054884e-06, "loss": 0.1123, "step": 6752 }, { "epoch": 1.84, "grad_norm": 1.6587299778761608, "learning_rate": 3.416283694464058e-06, "loss": 0.1064, "step": 6753 }, { "epoch": 1.84, "grad_norm": 1.5874451683595527, "learning_rate": 3.41488596039911e-06, "loss": 0.1006, "step": 6754 }, { "epoch": 1.84, "grad_norm": 1.5552910395266073, "learning_rate": 3.413488364032068e-06, "loss": 0.0989, "step": 6755 }, { "epoch": 1.84, "grad_norm": 1.6974333100605856, "learning_rate": 3.4120909054843375e-06, "loss": 0.1116, "step": 6756 }, { "epoch": 1.84, "grad_norm": 1.6456892862782926, "learning_rate": 3.410693584877317e-06, "loss": 0.0986, "step": 6757 }, { "epoch": 1.84, "grad_norm": 1.7511445853784662, "learning_rate": 3.4092964023323893e-06, "loss": 0.1107, "step": 6758 }, { "epoch": 1.85, "grad_norm": 1.5106536993423596, "learning_rate": 3.4078993579709286e-06, "loss": 0.0871, "step": 6759 }, { "epoch": 1.85, "grad_norm": 1.7754188949103504, "learning_rate": 3.406502451914292e-06, "loss": 0.1148, "step": 6760 }, { "epoch": 1.85, "grad_norm": 1.825775282221653, "learning_rate": 3.4051056842838315e-06, "loss": 0.1201, "step": 6761 }, { "epoch": 1.85, "grad_norm": 1.7339885427687431, "learning_rate": 3.403709055200881e-06, "loss": 0.1053, "step": 6762 }, { "epoch": 1.85, "grad_norm": 1.6463951494161604, "learning_rate": 3.4023125647867673e-06, "loss": 0.102, "step": 6763 }, { "epoch": 1.85, "grad_norm": 1.3877564016938142, "learning_rate": 3.4009162131628e-06, "loss": 0.0856, "step": 6764 }, { "epoch": 1.85, "grad_norm": 1.7386206191211375, "learning_rate": 3.3995200004502814e-06, "loss": 0.106, "step": 6765 }, { "epoch": 1.85, "grad_norm": 1.9656627117902095, "learning_rate": 3.398123926770497e-06, "loss": 0.1283, "step": 6766 }, { "epoch": 1.85, "grad_norm": 1.6283524778107452, "learning_rate": 3.396727992244726e-06, "loss": 0.1059, "step": 6767 }, { "epoch": 1.85, "grad_norm": 1.6808075951216175, "learning_rate": 3.395332196994231e-06, "loss": 0.1034, "step": 6768 }, { "epoch": 1.85, "grad_norm": 1.7202104174475907, "learning_rate": 3.393936541140264e-06, "loss": 0.1025, "step": 6769 }, { "epoch": 1.85, "grad_norm": 1.674657219966489, "learning_rate": 3.3925410248040645e-06, "loss": 0.0938, "step": 6770 }, { "epoch": 1.85, "grad_norm": 1.9521902291646216, "learning_rate": 3.3911456481068613e-06, "loss": 0.1305, "step": 6771 }, { "epoch": 1.85, "grad_norm": 1.7114943318200078, "learning_rate": 3.3897504111698665e-06, "loss": 0.1096, "step": 6772 }, { "epoch": 1.85, "grad_norm": 1.6001913104656171, "learning_rate": 3.3883553141142884e-06, "loss": 0.1043, "step": 6773 }, { "epoch": 1.85, "grad_norm": 1.5947898853341833, "learning_rate": 3.386960357061315e-06, "loss": 0.0863, "step": 6774 }, { "epoch": 1.85, "grad_norm": 1.8351455715428466, "learning_rate": 3.3855655401321267e-06, "loss": 0.1116, "step": 6775 }, { "epoch": 1.85, "grad_norm": 1.8376204098320752, "learning_rate": 3.38417086344789e-06, "loss": 0.1259, "step": 6776 }, { "epoch": 1.85, "grad_norm": 1.8467105437924536, "learning_rate": 3.3827763271297598e-06, "loss": 0.1138, "step": 6777 }, { "epoch": 1.85, "grad_norm": 1.7156743946748028, "learning_rate": 3.381381931298876e-06, "loss": 0.1122, "step": 6778 }, { "epoch": 1.85, "grad_norm": 1.6827558133408522, "learning_rate": 3.379987676076374e-06, "loss": 0.1089, "step": 6779 }, { "epoch": 1.85, "grad_norm": 1.725544625964348, "learning_rate": 3.378593561583368e-06, "loss": 0.1059, "step": 6780 }, { "epoch": 1.85, "grad_norm": 1.7748463688601241, "learning_rate": 3.3771995879409663e-06, "loss": 0.114, "step": 6781 }, { "epoch": 1.85, "grad_norm": 1.604328116183872, "learning_rate": 3.3758057552702604e-06, "loss": 0.0968, "step": 6782 }, { "epoch": 1.85, "grad_norm": 1.5000926943483102, "learning_rate": 3.374412063692334e-06, "loss": 0.0887, "step": 6783 }, { "epoch": 1.85, "grad_norm": 1.6050507388338577, "learning_rate": 3.3730185133282522e-06, "loss": 0.0843, "step": 6784 }, { "epoch": 1.85, "grad_norm": 1.481865550066402, "learning_rate": 3.3716251042990772e-06, "loss": 0.0858, "step": 6785 }, { "epoch": 1.85, "grad_norm": 1.4953701854721975, "learning_rate": 3.3702318367258503e-06, "loss": 0.0803, "step": 6786 }, { "epoch": 1.85, "grad_norm": 1.6274610969114172, "learning_rate": 3.368838710729605e-06, "loss": 0.1098, "step": 6787 }, { "epoch": 1.85, "grad_norm": 1.7689381443687886, "learning_rate": 3.36744572643136e-06, "loss": 0.1238, "step": 6788 }, { "epoch": 1.85, "grad_norm": 1.658357837517485, "learning_rate": 3.3660528839521245e-06, "loss": 0.1002, "step": 6789 }, { "epoch": 1.85, "grad_norm": 1.7680712754961638, "learning_rate": 3.3646601834128924e-06, "loss": 0.1287, "step": 6790 }, { "epoch": 1.85, "grad_norm": 1.6905441531854648, "learning_rate": 3.3632676249346487e-06, "loss": 0.096, "step": 6791 }, { "epoch": 1.85, "grad_norm": 1.496392918270836, "learning_rate": 3.361875208638362e-06, "loss": 0.0842, "step": 6792 }, { "epoch": 1.85, "grad_norm": 1.9876049722721103, "learning_rate": 3.360482934644993e-06, "loss": 0.1219, "step": 6793 }, { "epoch": 1.85, "grad_norm": 1.706428525564243, "learning_rate": 3.3590908030754854e-06, "loss": 0.1087, "step": 6794 }, { "epoch": 1.86, "grad_norm": 1.7055572553450906, "learning_rate": 3.3576988140507747e-06, "loss": 0.1154, "step": 6795 }, { "epoch": 1.86, "grad_norm": 1.7494640154157222, "learning_rate": 3.3563069676917798e-06, "loss": 0.1033, "step": 6796 }, { "epoch": 1.86, "grad_norm": 2.0180819105617163, "learning_rate": 3.3549152641194127e-06, "loss": 0.116, "step": 6797 }, { "epoch": 1.86, "grad_norm": 1.8299503947169884, "learning_rate": 3.3535237034545677e-06, "loss": 0.0998, "step": 6798 }, { "epoch": 1.86, "grad_norm": 1.507241111723557, "learning_rate": 3.3521322858181294e-06, "loss": 0.0833, "step": 6799 }, { "epoch": 1.86, "grad_norm": 1.8878963576144874, "learning_rate": 3.350741011330969e-06, "loss": 0.1244, "step": 6800 }, { "epoch": 1.86, "grad_norm": 1.6785979482293591, "learning_rate": 3.3493498801139466e-06, "loss": 0.1002, "step": 6801 }, { "epoch": 1.86, "grad_norm": 1.8639060323024437, "learning_rate": 3.347958892287907e-06, "loss": 0.1313, "step": 6802 }, { "epoch": 1.86, "grad_norm": 1.8046891470179227, "learning_rate": 3.3465680479736878e-06, "loss": 0.1116, "step": 6803 }, { "epoch": 1.86, "grad_norm": 1.8955289557663788, "learning_rate": 3.345177347292108e-06, "loss": 0.1079, "step": 6804 }, { "epoch": 1.86, "grad_norm": 1.8798308865742541, "learning_rate": 3.3437867903639787e-06, "loss": 0.124, "step": 6805 }, { "epoch": 1.86, "grad_norm": 1.6528085084593902, "learning_rate": 3.3423963773100944e-06, "loss": 0.1051, "step": 6806 }, { "epoch": 1.86, "grad_norm": 1.580552815884661, "learning_rate": 3.3410061082512422e-06, "loss": 0.0982, "step": 6807 }, { "epoch": 1.86, "grad_norm": 1.86509479680996, "learning_rate": 3.3396159833081902e-06, "loss": 0.1157, "step": 6808 }, { "epoch": 1.86, "grad_norm": 1.8735586597079534, "learning_rate": 3.3382260026017027e-06, "loss": 0.1102, "step": 6809 }, { "epoch": 1.86, "grad_norm": 1.6709029130553332, "learning_rate": 3.3368361662525226e-06, "loss": 0.11, "step": 6810 }, { "epoch": 1.86, "grad_norm": 1.7001275565236607, "learning_rate": 3.3354464743813864e-06, "loss": 0.121, "step": 6811 }, { "epoch": 1.86, "grad_norm": 1.6119000328540138, "learning_rate": 3.3340569271090145e-06, "loss": 0.1114, "step": 6812 }, { "epoch": 1.86, "grad_norm": 1.6104322547194587, "learning_rate": 3.3326675245561167e-06, "loss": 0.0895, "step": 6813 }, { "epoch": 1.86, "grad_norm": 1.719466972673618, "learning_rate": 3.331278266843388e-06, "loss": 0.1024, "step": 6814 }, { "epoch": 1.86, "grad_norm": 2.0418920781174905, "learning_rate": 3.329889154091515e-06, "loss": 0.1202, "step": 6815 }, { "epoch": 1.86, "grad_norm": 1.47751345716607, "learning_rate": 3.3285001864211672e-06, "loss": 0.0898, "step": 6816 }, { "epoch": 1.86, "grad_norm": 1.55658568438265, "learning_rate": 3.327111363953005e-06, "loss": 0.1079, "step": 6817 }, { "epoch": 1.86, "grad_norm": 1.7886776349829745, "learning_rate": 3.325722686807672e-06, "loss": 0.1151, "step": 6818 }, { "epoch": 1.86, "grad_norm": 1.9101037533608973, "learning_rate": 3.324334155105803e-06, "loss": 0.1069, "step": 6819 }, { "epoch": 1.86, "grad_norm": 1.9207785469052485, "learning_rate": 3.322945768968021e-06, "loss": 0.1133, "step": 6820 }, { "epoch": 1.86, "grad_norm": 1.6463705587351587, "learning_rate": 3.321557528514931e-06, "loss": 0.1083, "step": 6821 }, { "epoch": 1.86, "grad_norm": 2.1152200369863676, "learning_rate": 3.3201694338671313e-06, "loss": 0.105, "step": 6822 }, { "epoch": 1.86, "grad_norm": 1.8677461396172759, "learning_rate": 3.3187814851452026e-06, "loss": 0.1142, "step": 6823 }, { "epoch": 1.86, "grad_norm": 1.4222431743836528, "learning_rate": 3.3173936824697174e-06, "loss": 0.084, "step": 6824 }, { "epoch": 1.86, "grad_norm": 1.7380465707245294, "learning_rate": 3.3160060259612298e-06, "loss": 0.1171, "step": 6825 }, { "epoch": 1.86, "grad_norm": 1.862013647316006, "learning_rate": 3.314618515740289e-06, "loss": 0.1146, "step": 6826 }, { "epoch": 1.86, "grad_norm": 1.7160244788871146, "learning_rate": 3.313231151927424e-06, "loss": 0.1057, "step": 6827 }, { "epoch": 1.86, "grad_norm": 1.7265476436506584, "learning_rate": 3.311843934643157e-06, "loss": 0.106, "step": 6828 }, { "epoch": 1.86, "grad_norm": 1.600374850850436, "learning_rate": 3.3104568640079915e-06, "loss": 0.0989, "step": 6829 }, { "epoch": 1.86, "grad_norm": 1.7268363061930119, "learning_rate": 3.3090699401424244e-06, "loss": 0.1006, "step": 6830 }, { "epoch": 1.86, "grad_norm": 1.5495903443835775, "learning_rate": 3.307683163166934e-06, "loss": 0.1, "step": 6831 }, { "epoch": 1.87, "grad_norm": 1.814542023946377, "learning_rate": 3.306296533201992e-06, "loss": 0.1161, "step": 6832 }, { "epoch": 1.87, "grad_norm": 1.6512762212964869, "learning_rate": 3.3049100503680516e-06, "loss": 0.1111, "step": 6833 }, { "epoch": 1.87, "grad_norm": 1.6552681381415215, "learning_rate": 3.3035237147855575e-06, "loss": 0.1064, "step": 6834 }, { "epoch": 1.87, "grad_norm": 1.56218545296008, "learning_rate": 3.3021375265749385e-06, "loss": 0.0964, "step": 6835 }, { "epoch": 1.87, "grad_norm": 1.8150909131616568, "learning_rate": 3.300751485856613e-06, "loss": 0.1127, "step": 6836 }, { "epoch": 1.87, "grad_norm": 1.7825674061133077, "learning_rate": 3.299365592750984e-06, "loss": 0.1173, "step": 6837 }, { "epoch": 1.87, "grad_norm": 1.4670223102223359, "learning_rate": 3.2979798473784453e-06, "loss": 0.0791, "step": 6838 }, { "epoch": 1.87, "grad_norm": 1.6969598997128585, "learning_rate": 3.2965942498593735e-06, "loss": 0.1034, "step": 6839 }, { "epoch": 1.87, "grad_norm": 1.6801289997806028, "learning_rate": 3.295208800314137e-06, "loss": 0.1118, "step": 6840 }, { "epoch": 1.87, "grad_norm": 1.8444462937226822, "learning_rate": 3.293823498863087e-06, "loss": 0.1251, "step": 6841 }, { "epoch": 1.87, "grad_norm": 1.4389994226722203, "learning_rate": 3.292438345626565e-06, "loss": 0.0846, "step": 6842 }, { "epoch": 1.87, "grad_norm": 1.6907247443520161, "learning_rate": 3.2910533407248966e-06, "loss": 0.0992, "step": 6843 }, { "epoch": 1.87, "grad_norm": 2.0136715774701583, "learning_rate": 3.2896684842784e-06, "loss": 0.1333, "step": 6844 }, { "epoch": 1.87, "grad_norm": 1.5303895930962945, "learning_rate": 3.288283776407373e-06, "loss": 0.0959, "step": 6845 }, { "epoch": 1.87, "grad_norm": 1.8727724056795176, "learning_rate": 3.2868992172321068e-06, "loss": 0.1241, "step": 6846 }, { "epoch": 1.87, "grad_norm": 1.5475972304417096, "learning_rate": 3.2855148068728753e-06, "loss": 0.1123, "step": 6847 }, { "epoch": 1.87, "grad_norm": 1.9063395517175663, "learning_rate": 3.284130545449944e-06, "loss": 0.1038, "step": 6848 }, { "epoch": 1.87, "grad_norm": 1.735991127323755, "learning_rate": 3.282746433083559e-06, "loss": 0.1107, "step": 6849 }, { "epoch": 1.87, "grad_norm": 1.598154349493814, "learning_rate": 3.2813624698939617e-06, "loss": 0.1018, "step": 6850 }, { "epoch": 1.87, "grad_norm": 1.8207927539107558, "learning_rate": 3.279978656001373e-06, "loss": 0.1133, "step": 6851 }, { "epoch": 1.87, "grad_norm": 1.5615855127088596, "learning_rate": 3.278594991526006e-06, "loss": 0.092, "step": 6852 }, { "epoch": 1.87, "grad_norm": 1.4163635610115943, "learning_rate": 3.277211476588057e-06, "loss": 0.0855, "step": 6853 }, { "epoch": 1.87, "grad_norm": 1.6170113768243384, "learning_rate": 3.2758281113077127e-06, "loss": 0.0965, "step": 6854 }, { "epoch": 1.87, "grad_norm": 1.8171888820318796, "learning_rate": 3.2744448958051428e-06, "loss": 0.1197, "step": 6855 }, { "epoch": 1.87, "grad_norm": 1.933727587063898, "learning_rate": 3.2730618302005104e-06, "loss": 0.1261, "step": 6856 }, { "epoch": 1.87, "grad_norm": 1.5126104068440398, "learning_rate": 3.2716789146139573e-06, "loss": 0.0843, "step": 6857 }, { "epoch": 1.87, "grad_norm": 1.8983530562537037, "learning_rate": 3.2702961491656197e-06, "loss": 0.1221, "step": 6858 }, { "epoch": 1.87, "grad_norm": 1.9751572939693205, "learning_rate": 3.2689135339756155e-06, "loss": 0.1056, "step": 6859 }, { "epoch": 1.87, "grad_norm": 1.7397171724403664, "learning_rate": 3.2675310691640538e-06, "loss": 0.1169, "step": 6860 }, { "epoch": 1.87, "grad_norm": 1.550915514810408, "learning_rate": 3.266148754851025e-06, "loss": 0.0931, "step": 6861 }, { "epoch": 1.87, "grad_norm": 1.6633387563643878, "learning_rate": 3.2647665911566144e-06, "loss": 0.0987, "step": 6862 }, { "epoch": 1.87, "grad_norm": 1.7180240015595727, "learning_rate": 3.2633845782008867e-06, "loss": 0.1077, "step": 6863 }, { "epoch": 1.87, "grad_norm": 1.5671731229007753, "learning_rate": 3.2620027161038975e-06, "loss": 0.1044, "step": 6864 }, { "epoch": 1.87, "grad_norm": 1.6017729930546374, "learning_rate": 3.2606210049856877e-06, "loss": 0.1127, "step": 6865 }, { "epoch": 1.87, "grad_norm": 1.871195901785977, "learning_rate": 3.2592394449662867e-06, "loss": 0.1157, "step": 6866 }, { "epoch": 1.87, "grad_norm": 1.6457069842527312, "learning_rate": 3.2578580361657076e-06, "loss": 0.1132, "step": 6867 }, { "epoch": 1.87, "grad_norm": 1.76728088189756, "learning_rate": 3.2564767787039563e-06, "loss": 0.1192, "step": 6868 }, { "epoch": 1.88, "grad_norm": 1.7373662832248904, "learning_rate": 3.2550956727010184e-06, "loss": 0.1174, "step": 6869 }, { "epoch": 1.88, "grad_norm": 1.7081058608431943, "learning_rate": 3.2537147182768723e-06, "loss": 0.1, "step": 6870 }, { "epoch": 1.88, "grad_norm": 1.9868797112812537, "learning_rate": 3.2523339155514787e-06, "loss": 0.1186, "step": 6871 }, { "epoch": 1.88, "grad_norm": 1.6118508883446636, "learning_rate": 3.2509532646447883e-06, "loss": 0.0981, "step": 6872 }, { "epoch": 1.88, "grad_norm": 1.8292972866024735, "learning_rate": 3.2495727656767353e-06, "loss": 0.1091, "step": 6873 }, { "epoch": 1.88, "grad_norm": 1.7001894485427724, "learning_rate": 3.2481924187672466e-06, "loss": 0.0975, "step": 6874 }, { "epoch": 1.88, "grad_norm": 1.9354283960524175, "learning_rate": 3.2468122240362287e-06, "loss": 0.1206, "step": 6875 }, { "epoch": 1.88, "grad_norm": 1.5962509053364602, "learning_rate": 3.2454321816035805e-06, "loss": 0.0875, "step": 6876 }, { "epoch": 1.88, "grad_norm": 1.7213833141442425, "learning_rate": 3.2440522915891837e-06, "loss": 0.1254, "step": 6877 }, { "epoch": 1.88, "grad_norm": 1.7155166775442827, "learning_rate": 3.24267255411291e-06, "loss": 0.1073, "step": 6878 }, { "epoch": 1.88, "grad_norm": 1.862891458898532, "learning_rate": 3.2412929692946137e-06, "loss": 0.1141, "step": 6879 }, { "epoch": 1.88, "grad_norm": 1.7096947054374239, "learning_rate": 3.239913537254143e-06, "loss": 0.1127, "step": 6880 }, { "epoch": 1.88, "grad_norm": 1.6691562203007395, "learning_rate": 3.2385342581113242e-06, "loss": 0.1079, "step": 6881 }, { "epoch": 1.88, "grad_norm": 1.4631399596596462, "learning_rate": 3.2371551319859778e-06, "loss": 0.1017, "step": 6882 }, { "epoch": 1.88, "grad_norm": 1.6755398241893824, "learning_rate": 3.235776158997904e-06, "loss": 0.1084, "step": 6883 }, { "epoch": 1.88, "grad_norm": 1.6435249147301128, "learning_rate": 3.2343973392668976e-06, "loss": 0.1002, "step": 6884 }, { "epoch": 1.88, "grad_norm": 1.4647376928237656, "learning_rate": 3.233018672912731e-06, "loss": 0.0967, "step": 6885 }, { "epoch": 1.88, "grad_norm": 1.872224585131727, "learning_rate": 3.231640160055172e-06, "loss": 0.1149, "step": 6886 }, { "epoch": 1.88, "grad_norm": 1.8777079857247378, "learning_rate": 3.2302618008139696e-06, "loss": 0.1196, "step": 6887 }, { "epoch": 1.88, "grad_norm": 1.7160732762228272, "learning_rate": 3.228883595308862e-06, "loss": 0.116, "step": 6888 }, { "epoch": 1.88, "grad_norm": 1.5493231838412793, "learning_rate": 3.2275055436595713e-06, "loss": 0.0995, "step": 6889 }, { "epoch": 1.88, "grad_norm": 1.9442931432806336, "learning_rate": 3.2261276459858105e-06, "loss": 0.1031, "step": 6890 }, { "epoch": 1.88, "grad_norm": 1.7361892327631556, "learning_rate": 3.2247499024072727e-06, "loss": 0.1184, "step": 6891 }, { "epoch": 1.88, "grad_norm": 1.9338517786211538, "learning_rate": 3.223372313043647e-06, "loss": 0.1174, "step": 6892 }, { "epoch": 1.88, "grad_norm": 1.5935189432005834, "learning_rate": 3.221994878014599e-06, "loss": 0.0794, "step": 6893 }, { "epoch": 1.88, "grad_norm": 1.9029338360992318, "learning_rate": 3.2206175974397896e-06, "loss": 0.1089, "step": 6894 }, { "epoch": 1.88, "grad_norm": 1.7965669050756166, "learning_rate": 3.219240471438859e-06, "loss": 0.1104, "step": 6895 }, { "epoch": 1.88, "grad_norm": 1.816868620020627, "learning_rate": 3.21786350013144e-06, "loss": 0.1107, "step": 6896 }, { "epoch": 1.88, "grad_norm": 1.6820068858523178, "learning_rate": 3.216486683637146e-06, "loss": 0.0848, "step": 6897 }, { "epoch": 1.88, "grad_norm": 1.9882521225695604, "learning_rate": 3.2151100220755842e-06, "loss": 0.1233, "step": 6898 }, { "epoch": 1.88, "grad_norm": 1.4728983447933541, "learning_rate": 3.213733515566342e-06, "loss": 0.0807, "step": 6899 }, { "epoch": 1.88, "grad_norm": 1.7044523297856788, "learning_rate": 3.212357164228996e-06, "loss": 0.0871, "step": 6900 }, { "epoch": 1.88, "grad_norm": 1.8718722138930204, "learning_rate": 3.2109809681831084e-06, "loss": 0.128, "step": 6901 }, { "epoch": 1.88, "grad_norm": 1.6206777934234293, "learning_rate": 3.2096049275482306e-06, "loss": 0.087, "step": 6902 }, { "epoch": 1.88, "grad_norm": 1.8382214285601413, "learning_rate": 3.2082290424438945e-06, "loss": 0.0984, "step": 6903 }, { "epoch": 1.88, "grad_norm": 1.5410179782453917, "learning_rate": 3.2068533129896273e-06, "loss": 0.099, "step": 6904 }, { "epoch": 1.89, "grad_norm": 1.828132191117788, "learning_rate": 3.205477739304935e-06, "loss": 0.1089, "step": 6905 }, { "epoch": 1.89, "grad_norm": 1.7859253247322253, "learning_rate": 3.2041023215093135e-06, "loss": 0.1168, "step": 6906 }, { "epoch": 1.89, "grad_norm": 1.5848177211059267, "learning_rate": 3.2027270597222437e-06, "loss": 0.0877, "step": 6907 }, { "epoch": 1.89, "grad_norm": 1.8894989462765621, "learning_rate": 3.2013519540631954e-06, "loss": 0.101, "step": 6908 }, { "epoch": 1.89, "grad_norm": 1.5189238124892686, "learning_rate": 3.1999770046516198e-06, "loss": 0.0908, "step": 6909 }, { "epoch": 1.89, "grad_norm": 1.875630597933029, "learning_rate": 3.1986022116069625e-06, "loss": 0.1065, "step": 6910 }, { "epoch": 1.89, "grad_norm": 1.4857733602600187, "learning_rate": 3.1972275750486483e-06, "loss": 0.0918, "step": 6911 }, { "epoch": 1.89, "grad_norm": 1.6356902172707195, "learning_rate": 3.1958530950960908e-06, "loss": 0.0938, "step": 6912 }, { "epoch": 1.89, "grad_norm": 1.6060028659775716, "learning_rate": 3.194478771868693e-06, "loss": 0.1025, "step": 6913 }, { "epoch": 1.89, "grad_norm": 1.68595767488497, "learning_rate": 3.1931046054858366e-06, "loss": 0.1056, "step": 6914 }, { "epoch": 1.89, "grad_norm": 1.6444308021429594, "learning_rate": 3.1917305960669e-06, "loss": 0.1037, "step": 6915 }, { "epoch": 1.89, "grad_norm": 1.6434753994172973, "learning_rate": 3.1903567437312388e-06, "loss": 0.1044, "step": 6916 }, { "epoch": 1.89, "grad_norm": 1.3472447225523396, "learning_rate": 3.188983048598201e-06, "loss": 0.0801, "step": 6917 }, { "epoch": 1.89, "grad_norm": 1.9790042246075576, "learning_rate": 3.187609510787116e-06, "loss": 0.131, "step": 6918 }, { "epoch": 1.89, "grad_norm": 1.5585054547457806, "learning_rate": 3.186236130417306e-06, "loss": 0.0969, "step": 6919 }, { "epoch": 1.89, "grad_norm": 1.8272425438772226, "learning_rate": 3.184862907608072e-06, "loss": 0.115, "step": 6920 }, { "epoch": 1.89, "grad_norm": 1.8906317645910922, "learning_rate": 3.1834898424787073e-06, "loss": 0.1178, "step": 6921 }, { "epoch": 1.89, "grad_norm": 1.7885853006471713, "learning_rate": 3.1821169351484884e-06, "loss": 0.1155, "step": 6922 }, { "epoch": 1.89, "grad_norm": 1.716941106531984, "learning_rate": 3.1807441857366798e-06, "loss": 0.1152, "step": 6923 }, { "epoch": 1.89, "grad_norm": 1.6039718984157232, "learning_rate": 3.17937159436253e-06, "loss": 0.1123, "step": 6924 }, { "epoch": 1.89, "grad_norm": 1.4786552558677983, "learning_rate": 3.177999161145277e-06, "loss": 0.0856, "step": 6925 }, { "epoch": 1.89, "grad_norm": 1.7379766483963266, "learning_rate": 3.1766268862041406e-06, "loss": 0.0993, "step": 6926 }, { "epoch": 1.89, "grad_norm": 1.546009770461926, "learning_rate": 3.1752547696583323e-06, "loss": 0.0911, "step": 6927 }, { "epoch": 1.89, "grad_norm": 1.6973855445111465, "learning_rate": 3.1738828116270447e-06, "loss": 0.1064, "step": 6928 }, { "epoch": 1.89, "grad_norm": 1.6708884730166054, "learning_rate": 3.1725110122294615e-06, "loss": 0.1006, "step": 6929 }, { "epoch": 1.89, "grad_norm": 1.7379159314712331, "learning_rate": 3.1711393715847477e-06, "loss": 0.1173, "step": 6930 }, { "epoch": 1.89, "grad_norm": 1.704621509150292, "learning_rate": 3.1697678898120585e-06, "loss": 0.1124, "step": 6931 }, { "epoch": 1.89, "grad_norm": 1.5838012680969749, "learning_rate": 3.1683965670305317e-06, "loss": 0.098, "step": 6932 }, { "epoch": 1.89, "grad_norm": 1.7252879524218507, "learning_rate": 3.167025403359297e-06, "loss": 0.1219, "step": 6933 }, { "epoch": 1.89, "grad_norm": 1.681272206366005, "learning_rate": 3.1656543989174625e-06, "loss": 0.0988, "step": 6934 }, { "epoch": 1.89, "grad_norm": 1.748537925662701, "learning_rate": 3.164283553824129e-06, "loss": 0.0972, "step": 6935 }, { "epoch": 1.89, "grad_norm": 1.8193882482632355, "learning_rate": 3.16291286819838e-06, "loss": 0.1029, "step": 6936 }, { "epoch": 1.89, "grad_norm": 1.8323696417451965, "learning_rate": 3.1615423421592873e-06, "loss": 0.1029, "step": 6937 }, { "epoch": 1.89, "grad_norm": 1.9206713347219417, "learning_rate": 3.160171975825904e-06, "loss": 0.1165, "step": 6938 }, { "epoch": 1.89, "grad_norm": 1.7745707253327103, "learning_rate": 3.158801769317279e-06, "loss": 0.1055, "step": 6939 }, { "epoch": 1.89, "grad_norm": 1.5522016622902524, "learning_rate": 3.157431722752436e-06, "loss": 0.0903, "step": 6940 }, { "epoch": 1.89, "grad_norm": 1.8047051109478471, "learning_rate": 3.1560618362503937e-06, "loss": 0.1091, "step": 6941 }, { "epoch": 1.9, "grad_norm": 1.7826666259666064, "learning_rate": 3.1546921099301507e-06, "loss": 0.1015, "step": 6942 }, { "epoch": 1.9, "grad_norm": 1.612127477196659, "learning_rate": 3.1533225439106965e-06, "loss": 0.0938, "step": 6943 }, { "epoch": 1.9, "grad_norm": 1.5780096930072713, "learning_rate": 3.1519531383110014e-06, "loss": 0.1064, "step": 6944 }, { "epoch": 1.9, "grad_norm": 1.501936803215791, "learning_rate": 3.1505838932500287e-06, "loss": 0.1029, "step": 6945 }, { "epoch": 1.9, "grad_norm": 1.6618034266569448, "learning_rate": 3.149214808846721e-06, "loss": 0.1131, "step": 6946 }, { "epoch": 1.9, "grad_norm": 1.5867358281510369, "learning_rate": 3.1478458852200122e-06, "loss": 0.0907, "step": 6947 }, { "epoch": 1.9, "grad_norm": 1.5536016734892864, "learning_rate": 3.1464771224888173e-06, "loss": 0.1054, "step": 6948 }, { "epoch": 1.9, "grad_norm": 1.6548856868193258, "learning_rate": 3.1451085207720423e-06, "loss": 0.1118, "step": 6949 }, { "epoch": 1.9, "grad_norm": 1.9005836277447175, "learning_rate": 3.143740080188574e-06, "loss": 0.1161, "step": 6950 }, { "epoch": 1.9, "grad_norm": 1.6199172476610106, "learning_rate": 3.1423718008572913e-06, "loss": 0.1017, "step": 6951 }, { "epoch": 1.9, "grad_norm": 1.6319113502795275, "learning_rate": 3.1410036828970525e-06, "loss": 0.108, "step": 6952 }, { "epoch": 1.9, "grad_norm": 1.7871738552739331, "learning_rate": 3.1396357264267087e-06, "loss": 0.1041, "step": 6953 }, { "epoch": 1.9, "grad_norm": 1.880436779032392, "learning_rate": 3.1382679315650903e-06, "loss": 0.118, "step": 6954 }, { "epoch": 1.9, "grad_norm": 1.7066230896831414, "learning_rate": 3.136900298431019e-06, "loss": 0.0996, "step": 6955 }, { "epoch": 1.9, "grad_norm": 1.610475184087908, "learning_rate": 3.135532827143298e-06, "loss": 0.0936, "step": 6956 }, { "epoch": 1.9, "grad_norm": 1.735675363617396, "learning_rate": 3.134165517820722e-06, "loss": 0.0978, "step": 6957 }, { "epoch": 1.9, "grad_norm": 1.6280006872336938, "learning_rate": 3.132798370582065e-06, "loss": 0.1054, "step": 6958 }, { "epoch": 1.9, "grad_norm": 1.6754636255346593, "learning_rate": 3.131431385546093e-06, "loss": 0.1134, "step": 6959 }, { "epoch": 1.9, "grad_norm": 1.5471663158482993, "learning_rate": 3.130064562831553e-06, "loss": 0.0968, "step": 6960 }, { "epoch": 1.9, "grad_norm": 1.7074249087913036, "learning_rate": 3.1286979025571817e-06, "loss": 0.1061, "step": 6961 }, { "epoch": 1.9, "grad_norm": 1.627628144743767, "learning_rate": 3.1273314048416967e-06, "loss": 0.1088, "step": 6962 }, { "epoch": 1.9, "grad_norm": 1.6451333996415225, "learning_rate": 3.1259650698038106e-06, "loss": 0.1019, "step": 6963 }, { "epoch": 1.9, "grad_norm": 1.6130077271533523, "learning_rate": 3.1245988975622116e-06, "loss": 0.1086, "step": 6964 }, { "epoch": 1.9, "grad_norm": 1.733701179048697, "learning_rate": 3.12323288823558e-06, "loss": 0.1098, "step": 6965 }, { "epoch": 1.9, "grad_norm": 1.596404975205049, "learning_rate": 3.1218670419425794e-06, "loss": 0.095, "step": 6966 }, { "epoch": 1.9, "grad_norm": 1.8598970436389741, "learning_rate": 3.1205013588018616e-06, "loss": 0.1049, "step": 6967 }, { "epoch": 1.9, "grad_norm": 1.4870449403527588, "learning_rate": 3.119135838932059e-06, "loss": 0.0713, "step": 6968 }, { "epoch": 1.9, "grad_norm": 1.6233278555117967, "learning_rate": 3.1177704824517984e-06, "loss": 0.0971, "step": 6969 }, { "epoch": 1.9, "grad_norm": 1.3149177490753887, "learning_rate": 3.1164052894796836e-06, "loss": 0.0813, "step": 6970 }, { "epoch": 1.9, "grad_norm": 1.8082337005439453, "learning_rate": 3.1150402601343116e-06, "loss": 0.106, "step": 6971 }, { "epoch": 1.9, "grad_norm": 1.6447516039693149, "learning_rate": 3.113675394534258e-06, "loss": 0.0952, "step": 6972 }, { "epoch": 1.9, "grad_norm": 1.8057585759112431, "learning_rate": 3.1123106927980906e-06, "loss": 0.1069, "step": 6973 }, { "epoch": 1.9, "grad_norm": 1.8155349133272556, "learning_rate": 3.1109461550443574e-06, "loss": 0.1072, "step": 6974 }, { "epoch": 1.9, "grad_norm": 1.8465096949739028, "learning_rate": 3.1095817813915983e-06, "loss": 0.1133, "step": 6975 }, { "epoch": 1.9, "grad_norm": 1.7988461303064716, "learning_rate": 3.1082175719583336e-06, "loss": 0.1033, "step": 6976 }, { "epoch": 1.9, "grad_norm": 1.4299173057939911, "learning_rate": 3.106853526863073e-06, "loss": 0.0855, "step": 6977 }, { "epoch": 1.9, "grad_norm": 1.6435344335906938, "learning_rate": 3.105489646224307e-06, "loss": 0.1043, "step": 6978 }, { "epoch": 1.91, "grad_norm": 1.6678930535227892, "learning_rate": 3.1041259301605194e-06, "loss": 0.0895, "step": 6979 }, { "epoch": 1.91, "grad_norm": 1.652446888045159, "learning_rate": 3.1027623787901706e-06, "loss": 0.1038, "step": 6980 }, { "epoch": 1.91, "grad_norm": 1.4263905734156785, "learning_rate": 3.1013989922317154e-06, "loss": 0.0937, "step": 6981 }, { "epoch": 1.91, "grad_norm": 1.6349801745918973, "learning_rate": 3.100035770603589e-06, "loss": 0.1116, "step": 6982 }, { "epoch": 1.91, "grad_norm": 1.7871167154992145, "learning_rate": 3.0986727140242145e-06, "loss": 0.1129, "step": 6983 }, { "epoch": 1.91, "grad_norm": 1.8049718839766495, "learning_rate": 3.097309822611998e-06, "loss": 0.1121, "step": 6984 }, { "epoch": 1.91, "grad_norm": 1.6548684562502816, "learning_rate": 3.095947096485335e-06, "loss": 0.1017, "step": 6985 }, { "epoch": 1.91, "grad_norm": 1.576807029105432, "learning_rate": 3.0945845357626014e-06, "loss": 0.0873, "step": 6986 }, { "epoch": 1.91, "grad_norm": 1.6137110597195599, "learning_rate": 3.093222140562167e-06, "loss": 0.1059, "step": 6987 }, { "epoch": 1.91, "grad_norm": 1.6082029287492177, "learning_rate": 3.0918599110023784e-06, "loss": 0.1051, "step": 6988 }, { "epoch": 1.91, "grad_norm": 1.6739381436304963, "learning_rate": 3.090497847201574e-06, "loss": 0.0994, "step": 6989 }, { "epoch": 1.91, "grad_norm": 1.8879407364067824, "learning_rate": 3.0891359492780734e-06, "loss": 0.1151, "step": 6990 }, { "epoch": 1.91, "grad_norm": 1.6382514402051553, "learning_rate": 3.0877742173501857e-06, "loss": 0.1095, "step": 6991 }, { "epoch": 1.91, "grad_norm": 1.361987093540842, "learning_rate": 3.0864126515362003e-06, "loss": 0.0796, "step": 6992 }, { "epoch": 1.91, "grad_norm": 1.7451611067461479, "learning_rate": 3.0850512519544005e-06, "loss": 0.1032, "step": 6993 }, { "epoch": 1.91, "grad_norm": 1.5852254759250786, "learning_rate": 3.0836900187230475e-06, "loss": 0.0919, "step": 6994 }, { "epoch": 1.91, "grad_norm": 1.5321003892666154, "learning_rate": 3.0823289519603916e-06, "loss": 0.0875, "step": 6995 }, { "epoch": 1.91, "grad_norm": 1.6254591943080208, "learning_rate": 3.0809680517846664e-06, "loss": 0.0991, "step": 6996 }, { "epoch": 1.91, "grad_norm": 1.5992944150003598, "learning_rate": 3.0796073183140953e-06, "loss": 0.1016, "step": 6997 }, { "epoch": 1.91, "grad_norm": 1.6096473091359744, "learning_rate": 3.07824675166688e-06, "loss": 0.0977, "step": 6998 }, { "epoch": 1.91, "grad_norm": 1.6111611160954276, "learning_rate": 3.076886351961217e-06, "loss": 0.111, "step": 6999 }, { "epoch": 1.91, "grad_norm": 1.5514590117798215, "learning_rate": 3.0755261193152797e-06, "loss": 0.0786, "step": 7000 }, { "epoch": 1.91, "grad_norm": 1.6714633802157248, "learning_rate": 3.074166053847234e-06, "loss": 0.1142, "step": 7001 }, { "epoch": 1.91, "grad_norm": 1.8388493808640693, "learning_rate": 3.0728061556752246e-06, "loss": 0.1145, "step": 7002 }, { "epoch": 1.91, "grad_norm": 1.3953102600993432, "learning_rate": 3.071446424917388e-06, "loss": 0.0785, "step": 7003 }, { "epoch": 1.91, "grad_norm": 1.9322247883564228, "learning_rate": 3.070086861691839e-06, "loss": 0.1084, "step": 7004 }, { "epoch": 1.91, "grad_norm": 1.4723004556035557, "learning_rate": 3.0687274661166867e-06, "loss": 0.0839, "step": 7005 }, { "epoch": 1.91, "grad_norm": 1.8213631652597726, "learning_rate": 3.0673682383100194e-06, "loss": 0.1074, "step": 7006 }, { "epoch": 1.91, "grad_norm": 1.5734232397531398, "learning_rate": 3.0660091783899117e-06, "loss": 0.0958, "step": 7007 }, { "epoch": 1.91, "grad_norm": 1.8841931408607158, "learning_rate": 3.064650286474425e-06, "loss": 0.1149, "step": 7008 }, { "epoch": 1.91, "grad_norm": 1.4887384915213429, "learning_rate": 3.063291562681604e-06, "loss": 0.0843, "step": 7009 }, { "epoch": 1.91, "grad_norm": 1.5701159055833478, "learning_rate": 3.061933007129483e-06, "loss": 0.0986, "step": 7010 }, { "epoch": 1.91, "grad_norm": 1.484992609880974, "learning_rate": 3.0605746199360755e-06, "loss": 0.092, "step": 7011 }, { "epoch": 1.91, "grad_norm": 1.7772399858693864, "learning_rate": 3.059216401219387e-06, "loss": 0.1096, "step": 7012 }, { "epoch": 1.91, "grad_norm": 1.7899516216797535, "learning_rate": 3.0578583510974035e-06, "loss": 0.1022, "step": 7013 }, { "epoch": 1.91, "grad_norm": 1.7679347349770604, "learning_rate": 3.0565004696880984e-06, "loss": 0.1153, "step": 7014 }, { "epoch": 1.92, "grad_norm": 1.8008798141824234, "learning_rate": 3.055142757109428e-06, "loss": 0.1065, "step": 7015 }, { "epoch": 1.92, "grad_norm": 1.941361596932643, "learning_rate": 3.0537852134793393e-06, "loss": 0.1127, "step": 7016 }, { "epoch": 1.92, "grad_norm": 1.6060316351705297, "learning_rate": 3.0524278389157593e-06, "loss": 0.1009, "step": 7017 }, { "epoch": 1.92, "grad_norm": 1.6427162540464562, "learning_rate": 3.0510706335366034e-06, "loss": 0.1166, "step": 7018 }, { "epoch": 1.92, "grad_norm": 1.6775323250167518, "learning_rate": 3.04971359745977e-06, "loss": 0.0978, "step": 7019 }, { "epoch": 1.92, "grad_norm": 1.4108792143245346, "learning_rate": 3.048356730803146e-06, "loss": 0.0829, "step": 7020 }, { "epoch": 1.92, "grad_norm": 1.437339266982179, "learning_rate": 3.0470000336845977e-06, "loss": 0.0895, "step": 7021 }, { "epoch": 1.92, "grad_norm": 1.7229092485652902, "learning_rate": 3.045643506221985e-06, "loss": 0.1103, "step": 7022 }, { "epoch": 1.92, "grad_norm": 1.8188157080555727, "learning_rate": 3.044287148533146e-06, "loss": 0.1102, "step": 7023 }, { "epoch": 1.92, "grad_norm": 1.6249059374701658, "learning_rate": 3.0429309607359088e-06, "loss": 0.1038, "step": 7024 }, { "epoch": 1.92, "grad_norm": 1.295843262473041, "learning_rate": 3.041574942948081e-06, "loss": 0.0747, "step": 7025 }, { "epoch": 1.92, "grad_norm": 1.9370610366960002, "learning_rate": 3.040219095287463e-06, "loss": 0.1318, "step": 7026 }, { "epoch": 1.92, "grad_norm": 1.750306593491175, "learning_rate": 3.0388634178718336e-06, "loss": 0.0983, "step": 7027 }, { "epoch": 1.92, "grad_norm": 1.5656530025081041, "learning_rate": 3.0375079108189613e-06, "loss": 0.105, "step": 7028 }, { "epoch": 1.92, "grad_norm": 1.4161619701991173, "learning_rate": 3.0361525742465975e-06, "loss": 0.0781, "step": 7029 }, { "epoch": 1.92, "grad_norm": 1.5462961931064405, "learning_rate": 3.034797408272481e-06, "loss": 0.0941, "step": 7030 }, { "epoch": 1.92, "grad_norm": 1.7048849935411121, "learning_rate": 3.033442413014331e-06, "loss": 0.0997, "step": 7031 }, { "epoch": 1.92, "grad_norm": 1.7572741191390129, "learning_rate": 3.032087588589858e-06, "loss": 0.1108, "step": 7032 }, { "epoch": 1.92, "grad_norm": 1.5407038212087154, "learning_rate": 3.0307329351167527e-06, "loss": 0.0873, "step": 7033 }, { "epoch": 1.92, "grad_norm": 1.6744575782969193, "learning_rate": 3.0293784527126956e-06, "loss": 0.1089, "step": 7034 }, { "epoch": 1.92, "grad_norm": 1.6388947149107616, "learning_rate": 3.0280241414953477e-06, "loss": 0.1002, "step": 7035 }, { "epoch": 1.92, "grad_norm": 1.5309349531814886, "learning_rate": 3.0266700015823585e-06, "loss": 0.0954, "step": 7036 }, { "epoch": 1.92, "grad_norm": 1.3950952540378945, "learning_rate": 3.02531603309136e-06, "loss": 0.0791, "step": 7037 }, { "epoch": 1.92, "grad_norm": 1.566810636419363, "learning_rate": 3.023962236139972e-06, "loss": 0.0977, "step": 7038 }, { "epoch": 1.92, "grad_norm": 1.7232781221321731, "learning_rate": 3.022608610845795e-06, "loss": 0.1103, "step": 7039 }, { "epoch": 1.92, "grad_norm": 1.6029285729937697, "learning_rate": 3.0212551573264224e-06, "loss": 0.0915, "step": 7040 }, { "epoch": 1.92, "grad_norm": 1.8620922428555597, "learning_rate": 3.0199018756994245e-06, "loss": 0.1229, "step": 7041 }, { "epoch": 1.92, "grad_norm": 1.7063409879938742, "learning_rate": 3.018548766082362e-06, "loss": 0.1025, "step": 7042 }, { "epoch": 1.92, "grad_norm": 1.7495428540027698, "learning_rate": 3.017195828592777e-06, "loss": 0.0949, "step": 7043 }, { "epoch": 1.92, "grad_norm": 1.8628941034322832, "learning_rate": 3.0158430633481996e-06, "loss": 0.0888, "step": 7044 }, { "epoch": 1.92, "grad_norm": 1.9278426676759393, "learning_rate": 3.0144904704661413e-06, "loss": 0.126, "step": 7045 }, { "epoch": 1.92, "grad_norm": 1.7139311996866518, "learning_rate": 3.013138050064105e-06, "loss": 0.1021, "step": 7046 }, { "epoch": 1.92, "grad_norm": 1.7922380336956099, "learning_rate": 3.011785802259571e-06, "loss": 0.0959, "step": 7047 }, { "epoch": 1.92, "grad_norm": 1.779174011549657, "learning_rate": 3.0104337271700114e-06, "loss": 0.0989, "step": 7048 }, { "epoch": 1.92, "grad_norm": 1.6547499421196536, "learning_rate": 3.0090818249128773e-06, "loss": 0.0992, "step": 7049 }, { "epoch": 1.92, "grad_norm": 1.579963859393145, "learning_rate": 3.00773009560561e-06, "loss": 0.0955, "step": 7050 }, { "epoch": 1.92, "grad_norm": 1.8738536023269088, "learning_rate": 3.006378539365631e-06, "loss": 0.1286, "step": 7051 }, { "epoch": 1.93, "grad_norm": 1.6629894998857333, "learning_rate": 3.005027156310352e-06, "loss": 0.1054, "step": 7052 }, { "epoch": 1.93, "grad_norm": 1.7133419134919652, "learning_rate": 3.0036759465571634e-06, "loss": 0.1188, "step": 7053 }, { "epoch": 1.93, "grad_norm": 1.7280722086986222, "learning_rate": 3.0023249102234477e-06, "loss": 0.1086, "step": 7054 }, { "epoch": 1.93, "grad_norm": 1.5332679865794903, "learning_rate": 3.000974047426566e-06, "loss": 0.0869, "step": 7055 }, { "epoch": 1.93, "grad_norm": 2.0626182737333623, "learning_rate": 2.9996233582838686e-06, "loss": 0.1231, "step": 7056 }, { "epoch": 1.93, "grad_norm": 1.74456585778073, "learning_rate": 2.998272842912686e-06, "loss": 0.0893, "step": 7057 }, { "epoch": 1.93, "grad_norm": 1.6054574997147781, "learning_rate": 2.996922501430341e-06, "loss": 0.1025, "step": 7058 }, { "epoch": 1.93, "grad_norm": 1.4603425254002318, "learning_rate": 2.9955723339541336e-06, "loss": 0.0912, "step": 7059 }, { "epoch": 1.93, "grad_norm": 1.8128231524086977, "learning_rate": 2.994222340601355e-06, "loss": 0.1205, "step": 7060 }, { "epoch": 1.93, "grad_norm": 1.4363029164280554, "learning_rate": 2.992872521489275e-06, "loss": 0.0884, "step": 7061 }, { "epoch": 1.93, "grad_norm": 1.6674638649933378, "learning_rate": 2.991522876735154e-06, "loss": 0.1093, "step": 7062 }, { "epoch": 1.93, "grad_norm": 1.788910511454472, "learning_rate": 2.9901734064562328e-06, "loss": 0.109, "step": 7063 }, { "epoch": 1.93, "grad_norm": 1.5631003909681371, "learning_rate": 2.9888241107697413e-06, "loss": 0.0992, "step": 7064 }, { "epoch": 1.93, "grad_norm": 1.7520997242192788, "learning_rate": 2.98747498979289e-06, "loss": 0.1156, "step": 7065 }, { "epoch": 1.93, "grad_norm": 1.5628607948013824, "learning_rate": 2.9861260436428783e-06, "loss": 0.1003, "step": 7066 }, { "epoch": 1.93, "grad_norm": 1.6715099728768117, "learning_rate": 2.984777272436887e-06, "loss": 0.1044, "step": 7067 }, { "epoch": 1.93, "grad_norm": 1.8790883466645505, "learning_rate": 2.983428676292084e-06, "loss": 0.1465, "step": 7068 }, { "epoch": 1.93, "grad_norm": 1.716192938107207, "learning_rate": 2.982080255325618e-06, "loss": 0.11, "step": 7069 }, { "epoch": 1.93, "grad_norm": 1.606108637550764, "learning_rate": 2.98073200965463e-06, "loss": 0.1026, "step": 7070 }, { "epoch": 1.93, "grad_norm": 1.8387548041110526, "learning_rate": 2.9793839393962374e-06, "loss": 0.1023, "step": 7071 }, { "epoch": 1.93, "grad_norm": 1.9010902473341666, "learning_rate": 2.978036044667549e-06, "loss": 0.1223, "step": 7072 }, { "epoch": 1.93, "grad_norm": 1.5654136815738038, "learning_rate": 2.976688325585655e-06, "loss": 0.1047, "step": 7073 }, { "epoch": 1.93, "grad_norm": 1.4847395671122539, "learning_rate": 2.9753407822676307e-06, "loss": 0.0997, "step": 7074 }, { "epoch": 1.93, "grad_norm": 1.3726704549669104, "learning_rate": 2.973993414830534e-06, "loss": 0.0786, "step": 7075 }, { "epoch": 1.93, "grad_norm": 1.6516326915108621, "learning_rate": 2.9726462233914146e-06, "loss": 0.1045, "step": 7076 }, { "epoch": 1.93, "grad_norm": 1.8592349258282432, "learning_rate": 2.971299208067298e-06, "loss": 0.1204, "step": 7077 }, { "epoch": 1.93, "grad_norm": 1.6906990805707725, "learning_rate": 2.9699523689752017e-06, "loss": 0.1036, "step": 7078 }, { "epoch": 1.93, "grad_norm": 1.744519183401334, "learning_rate": 2.9686057062321226e-06, "loss": 0.0861, "step": 7079 }, { "epoch": 1.93, "grad_norm": 1.5018210137823347, "learning_rate": 2.9672592199550465e-06, "loss": 0.081, "step": 7080 }, { "epoch": 1.93, "grad_norm": 1.4990104102537358, "learning_rate": 2.965912910260938e-06, "loss": 0.1013, "step": 7081 }, { "epoch": 1.93, "grad_norm": 1.8526982602162374, "learning_rate": 2.9645667772667553e-06, "loss": 0.109, "step": 7082 }, { "epoch": 1.93, "grad_norm": 1.7620767486072342, "learning_rate": 2.9632208210894326e-06, "loss": 0.1068, "step": 7083 }, { "epoch": 1.93, "grad_norm": 1.7072611840119034, "learning_rate": 2.961875041845894e-06, "loss": 0.1022, "step": 7084 }, { "epoch": 1.93, "grad_norm": 1.7262828298989001, "learning_rate": 2.960529439653046e-06, "loss": 0.1224, "step": 7085 }, { "epoch": 1.93, "grad_norm": 1.665912974284882, "learning_rate": 2.959184014627781e-06, "loss": 0.1084, "step": 7086 }, { "epoch": 1.93, "grad_norm": 1.5474788104025523, "learning_rate": 2.957838766886972e-06, "loss": 0.0949, "step": 7087 }, { "epoch": 1.94, "grad_norm": 1.5689255610722481, "learning_rate": 2.9564936965474844e-06, "loss": 0.1013, "step": 7088 }, { "epoch": 1.94, "grad_norm": 1.9302090817034776, "learning_rate": 2.955148803726161e-06, "loss": 0.1243, "step": 7089 }, { "epoch": 1.94, "grad_norm": 1.5052761051096597, "learning_rate": 2.953804088539833e-06, "loss": 0.0784, "step": 7090 }, { "epoch": 1.94, "grad_norm": 1.6331099651091168, "learning_rate": 2.9524595511053137e-06, "loss": 0.1098, "step": 7091 }, { "epoch": 1.94, "grad_norm": 1.8628799541688463, "learning_rate": 2.9511151915394043e-06, "loss": 0.1067, "step": 7092 }, { "epoch": 1.94, "grad_norm": 1.6886222147935586, "learning_rate": 2.949771009958885e-06, "loss": 0.1144, "step": 7093 }, { "epoch": 1.94, "grad_norm": 1.570771081163554, "learning_rate": 2.948427006480528e-06, "loss": 0.0888, "step": 7094 }, { "epoch": 1.94, "grad_norm": 1.9764596395837009, "learning_rate": 2.9470831812210836e-06, "loss": 0.11, "step": 7095 }, { "epoch": 1.94, "grad_norm": 1.979109919147417, "learning_rate": 2.9457395342972904e-06, "loss": 0.1233, "step": 7096 }, { "epoch": 1.94, "grad_norm": 1.5829595729326278, "learning_rate": 2.9443960658258696e-06, "loss": 0.1024, "step": 7097 }, { "epoch": 1.94, "grad_norm": 1.7584027764203016, "learning_rate": 2.943052775923526e-06, "loss": 0.1058, "step": 7098 }, { "epoch": 1.94, "grad_norm": 1.7852003215974552, "learning_rate": 2.9417096647069532e-06, "loss": 0.1092, "step": 7099 }, { "epoch": 1.94, "grad_norm": 1.5973069884742903, "learning_rate": 2.9403667322928255e-06, "loss": 0.0999, "step": 7100 }, { "epoch": 1.94, "grad_norm": 1.715281742152762, "learning_rate": 2.9390239787978026e-06, "loss": 0.1039, "step": 7101 }, { "epoch": 1.94, "grad_norm": 1.6556594209082036, "learning_rate": 2.937681404338527e-06, "loss": 0.0947, "step": 7102 }, { "epoch": 1.94, "grad_norm": 1.7588433082327835, "learning_rate": 2.93633900903163e-06, "loss": 0.0865, "step": 7103 }, { "epoch": 1.94, "grad_norm": 1.6301660289070452, "learning_rate": 2.9349967929937218e-06, "loss": 0.0983, "step": 7104 }, { "epoch": 1.94, "grad_norm": 1.5966796797292506, "learning_rate": 2.9336547563414036e-06, "loss": 0.1049, "step": 7105 }, { "epoch": 1.94, "grad_norm": 1.738022142401436, "learning_rate": 2.9323128991912543e-06, "loss": 0.1095, "step": 7106 }, { "epoch": 1.94, "grad_norm": 2.0028512616026064, "learning_rate": 2.9309712216598413e-06, "loss": 0.1318, "step": 7107 }, { "epoch": 1.94, "grad_norm": 1.6712006670282848, "learning_rate": 2.929629723863715e-06, "loss": 0.0933, "step": 7108 }, { "epoch": 1.94, "grad_norm": 1.6122640228582024, "learning_rate": 2.9282884059194112e-06, "loss": 0.0932, "step": 7109 }, { "epoch": 1.94, "grad_norm": 1.7899905360257955, "learning_rate": 2.926947267943447e-06, "loss": 0.1103, "step": 7110 }, { "epoch": 1.94, "grad_norm": 1.4885080427170663, "learning_rate": 2.9256063100523303e-06, "loss": 0.0865, "step": 7111 }, { "epoch": 1.94, "grad_norm": 1.8351737586147492, "learning_rate": 2.9242655323625458e-06, "loss": 0.1189, "step": 7112 }, { "epoch": 1.94, "grad_norm": 1.5013271185402737, "learning_rate": 2.9229249349905686e-06, "loss": 0.0944, "step": 7113 }, { "epoch": 1.94, "grad_norm": 1.6504760832684162, "learning_rate": 2.9215845180528537e-06, "loss": 0.1041, "step": 7114 }, { "epoch": 1.94, "grad_norm": 1.6096959939428497, "learning_rate": 2.9202442816658433e-06, "loss": 0.1038, "step": 7115 }, { "epoch": 1.94, "grad_norm": 2.0228252946783805, "learning_rate": 2.918904225945961e-06, "loss": 0.1232, "step": 7116 }, { "epoch": 1.94, "grad_norm": 1.4798887516881394, "learning_rate": 2.9175643510096195e-06, "loss": 0.0807, "step": 7117 }, { "epoch": 1.94, "grad_norm": 1.6991095850444975, "learning_rate": 2.916224656973211e-06, "loss": 0.1014, "step": 7118 }, { "epoch": 1.94, "grad_norm": 1.8736220977574374, "learning_rate": 2.9148851439531177e-06, "loss": 0.1295, "step": 7119 }, { "epoch": 1.94, "grad_norm": 1.4613378710563603, "learning_rate": 2.9135458120656958e-06, "loss": 0.0948, "step": 7120 }, { "epoch": 1.94, "grad_norm": 1.632854429077691, "learning_rate": 2.912206661427297e-06, "loss": 0.1244, "step": 7121 }, { "epoch": 1.94, "grad_norm": 1.6188542185006656, "learning_rate": 2.910867692154249e-06, "loss": 0.1047, "step": 7122 }, { "epoch": 1.94, "grad_norm": 1.5997744629828488, "learning_rate": 2.909528904362872e-06, "loss": 0.1055, "step": 7123 }, { "epoch": 1.94, "grad_norm": 1.4820527828936578, "learning_rate": 2.908190298169461e-06, "loss": 0.0981, "step": 7124 }, { "epoch": 1.95, "grad_norm": 1.7883341284731695, "learning_rate": 2.9068518736903063e-06, "loss": 0.1155, "step": 7125 }, { "epoch": 1.95, "grad_norm": 1.6594220733904497, "learning_rate": 2.9055136310416664e-06, "loss": 0.0894, "step": 7126 }, { "epoch": 1.95, "grad_norm": 1.846004590050599, "learning_rate": 2.9041755703398023e-06, "loss": 0.1177, "step": 7127 }, { "epoch": 1.95, "grad_norm": 1.5881793862083315, "learning_rate": 2.9028376917009448e-06, "loss": 0.0853, "step": 7128 }, { "epoch": 1.95, "grad_norm": 1.6827123383021485, "learning_rate": 2.901499995241319e-06, "loss": 0.099, "step": 7129 }, { "epoch": 1.95, "grad_norm": 1.5060885124736059, "learning_rate": 2.900162481077126e-06, "loss": 0.0907, "step": 7130 }, { "epoch": 1.95, "grad_norm": 1.825513601583001, "learning_rate": 2.8988251493245607e-06, "loss": 0.1062, "step": 7131 }, { "epoch": 1.95, "grad_norm": 1.7653753315030944, "learning_rate": 2.897488000099788e-06, "loss": 0.0973, "step": 7132 }, { "epoch": 1.95, "grad_norm": 1.8267783962810458, "learning_rate": 2.896151033518971e-06, "loss": 0.1162, "step": 7133 }, { "epoch": 1.95, "grad_norm": 1.5168075840055988, "learning_rate": 2.8948142496982488e-06, "loss": 0.0916, "step": 7134 }, { "epoch": 1.95, "grad_norm": 1.6094190110775615, "learning_rate": 2.8934776487537498e-06, "loss": 0.093, "step": 7135 }, { "epoch": 1.95, "grad_norm": 2.0060222531550997, "learning_rate": 2.8921412308015795e-06, "loss": 0.1365, "step": 7136 }, { "epoch": 1.95, "grad_norm": 1.8704228851427382, "learning_rate": 2.8908049959578375e-06, "loss": 0.1078, "step": 7137 }, { "epoch": 1.95, "grad_norm": 1.580241100998725, "learning_rate": 2.8894689443385947e-06, "loss": 0.0934, "step": 7138 }, { "epoch": 1.95, "grad_norm": 1.651453207452964, "learning_rate": 2.888133076059919e-06, "loss": 0.0977, "step": 7139 }, { "epoch": 1.95, "grad_norm": 1.6746311230271989, "learning_rate": 2.8867973912378524e-06, "loss": 0.1004, "step": 7140 }, { "epoch": 1.95, "grad_norm": 1.5175627637414244, "learning_rate": 2.885461889988428e-06, "loss": 0.0806, "step": 7141 }, { "epoch": 1.95, "grad_norm": 1.6337083980116929, "learning_rate": 2.8841265724276566e-06, "loss": 0.1178, "step": 7142 }, { "epoch": 1.95, "grad_norm": 1.556722073013614, "learning_rate": 2.882791438671543e-06, "loss": 0.0847, "step": 7143 }, { "epoch": 1.95, "grad_norm": 1.5952924466319007, "learning_rate": 2.8814564888360617e-06, "loss": 0.0885, "step": 7144 }, { "epoch": 1.95, "grad_norm": 1.5444463706038987, "learning_rate": 2.8801217230371838e-06, "loss": 0.1063, "step": 7145 }, { "epoch": 1.95, "grad_norm": 1.7075500760743183, "learning_rate": 2.8787871413908563e-06, "loss": 0.1012, "step": 7146 }, { "epoch": 1.95, "grad_norm": 1.523736793271295, "learning_rate": 2.8774527440130173e-06, "loss": 0.0831, "step": 7147 }, { "epoch": 1.95, "grad_norm": 1.6414465474566335, "learning_rate": 2.8761185310195803e-06, "loss": 0.0977, "step": 7148 }, { "epoch": 1.95, "grad_norm": 1.619888272049271, "learning_rate": 2.874784502526456e-06, "loss": 0.092, "step": 7149 }, { "epoch": 1.95, "grad_norm": 1.4825273685704115, "learning_rate": 2.87345065864952e-06, "loss": 0.0823, "step": 7150 }, { "epoch": 1.95, "grad_norm": 1.8710760853350006, "learning_rate": 2.8721169995046503e-06, "loss": 0.1217, "step": 7151 }, { "epoch": 1.95, "grad_norm": 1.688078830066312, "learning_rate": 2.8707835252076967e-06, "loss": 0.0943, "step": 7152 }, { "epoch": 1.95, "grad_norm": 1.5154091912650776, "learning_rate": 2.8694502358745003e-06, "loss": 0.079, "step": 7153 }, { "epoch": 1.95, "grad_norm": 1.6424958786202457, "learning_rate": 2.86811713162088e-06, "loss": 0.1062, "step": 7154 }, { "epoch": 1.95, "grad_norm": 1.4125838589757584, "learning_rate": 2.8667842125626473e-06, "loss": 0.08, "step": 7155 }, { "epoch": 1.95, "grad_norm": 1.7350123309603294, "learning_rate": 2.8654514788155846e-06, "loss": 0.0955, "step": 7156 }, { "epoch": 1.95, "grad_norm": 1.65850665524578, "learning_rate": 2.864118930495472e-06, "loss": 0.0964, "step": 7157 }, { "epoch": 1.95, "grad_norm": 1.6446333632298167, "learning_rate": 2.862786567718062e-06, "loss": 0.103, "step": 7158 }, { "epoch": 1.95, "grad_norm": 1.755229985312071, "learning_rate": 2.861454390599101e-06, "loss": 0.0988, "step": 7159 }, { "epoch": 1.95, "grad_norm": 1.5730082503018132, "learning_rate": 2.860122399254312e-06, "loss": 0.0919, "step": 7160 }, { "epoch": 1.95, "grad_norm": 1.5481810222978016, "learning_rate": 2.858790593799405e-06, "loss": 0.0892, "step": 7161 }, { "epoch": 1.96, "grad_norm": 1.6721221997241988, "learning_rate": 2.85745897435007e-06, "loss": 0.1024, "step": 7162 }, { "epoch": 1.96, "grad_norm": 1.582851270403692, "learning_rate": 2.856127541021989e-06, "loss": 0.0959, "step": 7163 }, { "epoch": 1.96, "grad_norm": 1.683197511584693, "learning_rate": 2.8547962939308187e-06, "loss": 0.0922, "step": 7164 }, { "epoch": 1.96, "grad_norm": 1.8433705924611326, "learning_rate": 2.8534652331922073e-06, "loss": 0.1227, "step": 7165 }, { "epoch": 1.96, "grad_norm": 2.090068189556892, "learning_rate": 2.8521343589217816e-06, "loss": 0.1083, "step": 7166 }, { "epoch": 1.96, "grad_norm": 1.9299027435630736, "learning_rate": 2.8508036712351535e-06, "loss": 0.1131, "step": 7167 }, { "epoch": 1.96, "grad_norm": 2.0628045657195453, "learning_rate": 2.849473170247917e-06, "loss": 0.1156, "step": 7168 }, { "epoch": 1.96, "grad_norm": 1.888024961377578, "learning_rate": 2.8481428560756565e-06, "loss": 0.117, "step": 7169 }, { "epoch": 1.96, "grad_norm": 1.971710023191058, "learning_rate": 2.846812728833931e-06, "loss": 0.129, "step": 7170 }, { "epoch": 1.96, "grad_norm": 1.797874835398455, "learning_rate": 2.8454827886382918e-06, "loss": 0.1106, "step": 7171 }, { "epoch": 1.96, "grad_norm": 1.4959055368596517, "learning_rate": 2.844153035604269e-06, "loss": 0.0928, "step": 7172 }, { "epoch": 1.96, "grad_norm": 1.560051113486972, "learning_rate": 2.842823469847376e-06, "loss": 0.1007, "step": 7173 }, { "epoch": 1.96, "grad_norm": 1.5667186296783124, "learning_rate": 2.841494091483111e-06, "loss": 0.0927, "step": 7174 }, { "epoch": 1.96, "grad_norm": 1.8206023033816052, "learning_rate": 2.840164900626958e-06, "loss": 0.1109, "step": 7175 }, { "epoch": 1.96, "grad_norm": 1.9050058938585528, "learning_rate": 2.838835897394382e-06, "loss": 0.1155, "step": 7176 }, { "epoch": 1.96, "grad_norm": 1.883249590174709, "learning_rate": 2.8375070819008345e-06, "loss": 0.1065, "step": 7177 }, { "epoch": 1.96, "grad_norm": 1.5069487818357925, "learning_rate": 2.8361784542617476e-06, "loss": 0.0978, "step": 7178 }, { "epoch": 1.96, "grad_norm": 1.9298931735142786, "learning_rate": 2.8348500145925384e-06, "loss": 0.1059, "step": 7179 }, { "epoch": 1.96, "grad_norm": 1.6508220445571002, "learning_rate": 2.8335217630086053e-06, "loss": 0.1064, "step": 7180 }, { "epoch": 1.96, "grad_norm": 1.716748732576026, "learning_rate": 2.8321936996253368e-06, "loss": 0.0998, "step": 7181 }, { "epoch": 1.96, "grad_norm": 1.6736281390205572, "learning_rate": 2.8308658245580977e-06, "loss": 0.1021, "step": 7182 }, { "epoch": 1.96, "grad_norm": 1.6536782061474171, "learning_rate": 2.829538137922243e-06, "loss": 0.0974, "step": 7183 }, { "epoch": 1.96, "grad_norm": 1.6557844422473402, "learning_rate": 2.828210639833106e-06, "loss": 0.0971, "step": 7184 }, { "epoch": 1.96, "grad_norm": 1.756145142490854, "learning_rate": 2.826883330406006e-06, "loss": 0.1074, "step": 7185 }, { "epoch": 1.96, "grad_norm": 1.6257806350083217, "learning_rate": 2.8255562097562437e-06, "loss": 0.1079, "step": 7186 }, { "epoch": 1.96, "grad_norm": 1.519961551973838, "learning_rate": 2.8242292779991086e-06, "loss": 0.0857, "step": 7187 }, { "epoch": 1.96, "grad_norm": 1.5920220831505345, "learning_rate": 2.822902535249867e-06, "loss": 0.1021, "step": 7188 }, { "epoch": 1.96, "grad_norm": 1.432716073255345, "learning_rate": 2.8215759816237748e-06, "loss": 0.0875, "step": 7189 }, { "epoch": 1.96, "grad_norm": 1.6203884156941701, "learning_rate": 2.8202496172360715e-06, "loss": 0.1061, "step": 7190 }, { "epoch": 1.96, "grad_norm": 1.8191959811143301, "learning_rate": 2.8189234422019707e-06, "loss": 0.1036, "step": 7191 }, { "epoch": 1.96, "grad_norm": 1.5326807414032029, "learning_rate": 2.817597456636682e-06, "loss": 0.0977, "step": 7192 }, { "epoch": 1.96, "grad_norm": 1.7563990423728753, "learning_rate": 2.8162716606553885e-06, "loss": 0.1068, "step": 7193 }, { "epoch": 1.96, "grad_norm": 1.6657378268546792, "learning_rate": 2.8149460543732666e-06, "loss": 0.0922, "step": 7194 }, { "epoch": 1.96, "grad_norm": 2.0817520235831894, "learning_rate": 2.8136206379054658e-06, "loss": 0.0836, "step": 7195 }, { "epoch": 1.96, "grad_norm": 1.7318692643651203, "learning_rate": 2.812295411367131e-06, "loss": 0.1101, "step": 7196 }, { "epoch": 1.96, "grad_norm": 1.490375277170295, "learning_rate": 2.8109703748733746e-06, "loss": 0.0998, "step": 7197 }, { "epoch": 1.97, "grad_norm": 1.4836005685472418, "learning_rate": 2.8096455285393094e-06, "loss": 0.0945, "step": 7198 }, { "epoch": 1.97, "grad_norm": 1.4480946662179084, "learning_rate": 2.808320872480018e-06, "loss": 0.0969, "step": 7199 }, { "epoch": 1.97, "grad_norm": 1.5673698210646434, "learning_rate": 2.8069964068105786e-06, "loss": 0.0918, "step": 7200 }, { "epoch": 1.97, "grad_norm": 1.7804432275168103, "learning_rate": 2.8056721316460417e-06, "loss": 0.1082, "step": 7201 }, { "epoch": 1.97, "grad_norm": 1.7593922536940547, "learning_rate": 2.8043480471014524e-06, "loss": 0.0974, "step": 7202 }, { "epoch": 1.97, "grad_norm": 1.9262990226737913, "learning_rate": 2.8030241532918244e-06, "loss": 0.1074, "step": 7203 }, { "epoch": 1.97, "grad_norm": 1.79953210518286, "learning_rate": 2.801700450332171e-06, "loss": 0.1072, "step": 7204 }, { "epoch": 1.97, "grad_norm": 1.4445661151242126, "learning_rate": 2.8003769383374765e-06, "loss": 0.0735, "step": 7205 }, { "epoch": 1.97, "grad_norm": 1.7606078172438218, "learning_rate": 2.7990536174227175e-06, "loss": 0.0959, "step": 7206 }, { "epoch": 1.97, "grad_norm": 1.713978977561146, "learning_rate": 2.7977304877028464e-06, "loss": 0.1072, "step": 7207 }, { "epoch": 1.97, "grad_norm": 1.689511396125973, "learning_rate": 2.796407549292809e-06, "loss": 0.101, "step": 7208 }, { "epoch": 1.97, "grad_norm": 2.113539642080664, "learning_rate": 2.7950848023075194e-06, "loss": 0.1263, "step": 7209 }, { "epoch": 1.97, "grad_norm": 1.587324320892791, "learning_rate": 2.7937622468618906e-06, "loss": 0.0868, "step": 7210 }, { "epoch": 1.97, "grad_norm": 1.861382463575405, "learning_rate": 2.792439883070808e-06, "loss": 0.0958, "step": 7211 }, { "epoch": 1.97, "grad_norm": 1.592869652685055, "learning_rate": 2.7911177110491485e-06, "loss": 0.0879, "step": 7212 }, { "epoch": 1.97, "grad_norm": 1.3316585634886697, "learning_rate": 2.789795730911764e-06, "loss": 0.0839, "step": 7213 }, { "epoch": 1.97, "grad_norm": 1.6097170946132777, "learning_rate": 2.788473942773501e-06, "loss": 0.0924, "step": 7214 }, { "epoch": 1.97, "grad_norm": 1.7245079775835004, "learning_rate": 2.787152346749173e-06, "loss": 0.1085, "step": 7215 }, { "epoch": 1.97, "grad_norm": 1.6176809503393725, "learning_rate": 2.7858309429535934e-06, "loss": 0.0971, "step": 7216 }, { "epoch": 1.97, "grad_norm": 1.6880686883653846, "learning_rate": 2.7845097315015477e-06, "loss": 0.112, "step": 7217 }, { "epoch": 1.97, "grad_norm": 1.5862474980487622, "learning_rate": 2.7831887125078128e-06, "loss": 0.1006, "step": 7218 }, { "epoch": 1.97, "grad_norm": 1.7483993647632274, "learning_rate": 2.7818678860871395e-06, "loss": 0.1192, "step": 7219 }, { "epoch": 1.97, "grad_norm": 1.556856555694627, "learning_rate": 2.7805472523542755e-06, "loss": 0.0862, "step": 7220 }, { "epoch": 1.97, "grad_norm": 1.5409671225193207, "learning_rate": 2.7792268114239336e-06, "loss": 0.089, "step": 7221 }, { "epoch": 1.97, "grad_norm": 1.4690074078029538, "learning_rate": 2.7779065634108265e-06, "loss": 0.0894, "step": 7222 }, { "epoch": 1.97, "grad_norm": 2.054361122286564, "learning_rate": 2.776586508429639e-06, "loss": 0.1127, "step": 7223 }, { "epoch": 1.97, "grad_norm": 1.7072596980160584, "learning_rate": 2.7752666465950477e-06, "loss": 0.1003, "step": 7224 }, { "epoch": 1.97, "grad_norm": 1.5843975643675292, "learning_rate": 2.773946978021704e-06, "loss": 0.113, "step": 7225 }, { "epoch": 1.97, "grad_norm": 1.6073929650853112, "learning_rate": 2.7726275028242532e-06, "loss": 0.0893, "step": 7226 }, { "epoch": 1.97, "grad_norm": 1.8179193342958866, "learning_rate": 2.771308221117309e-06, "loss": 0.1217, "step": 7227 }, { "epoch": 1.97, "grad_norm": 1.8956889407110802, "learning_rate": 2.7699891330154825e-06, "loss": 0.1259, "step": 7228 }, { "epoch": 1.97, "grad_norm": 1.460337773586239, "learning_rate": 2.7686702386333584e-06, "loss": 0.0934, "step": 7229 }, { "epoch": 1.97, "grad_norm": 1.6424100393049181, "learning_rate": 2.767351538085512e-06, "loss": 0.0964, "step": 7230 }, { "epoch": 1.97, "grad_norm": 1.6913486566528593, "learning_rate": 2.7660330314864937e-06, "loss": 0.0873, "step": 7231 }, { "epoch": 1.97, "grad_norm": 1.701191391234638, "learning_rate": 2.7647147189508485e-06, "loss": 0.1144, "step": 7232 }, { "epoch": 1.97, "grad_norm": 1.6999291881361889, "learning_rate": 2.763396600593088e-06, "loss": 0.1094, "step": 7233 }, { "epoch": 1.97, "grad_norm": 1.7532582731084572, "learning_rate": 2.762078676527723e-06, "loss": 0.1127, "step": 7234 }, { "epoch": 1.98, "grad_norm": 1.7298472905038729, "learning_rate": 2.760760946869237e-06, "loss": 0.1039, "step": 7235 }, { "epoch": 1.98, "grad_norm": 1.5900557445657748, "learning_rate": 2.7594434117321044e-06, "loss": 0.0987, "step": 7236 }, { "epoch": 1.98, "grad_norm": 1.4951148741114944, "learning_rate": 2.758126071230776e-06, "loss": 0.0864, "step": 7237 }, { "epoch": 1.98, "grad_norm": 1.5728734917060558, "learning_rate": 2.7568089254796893e-06, "loss": 0.0885, "step": 7238 }, { "epoch": 1.98, "grad_norm": 1.6464266458570087, "learning_rate": 2.755491974593261e-06, "loss": 0.092, "step": 7239 }, { "epoch": 1.98, "grad_norm": 1.5961460745524656, "learning_rate": 2.754175218685899e-06, "loss": 0.095, "step": 7240 }, { "epoch": 1.98, "grad_norm": 1.5216670022738257, "learning_rate": 2.752858657871984e-06, "loss": 0.0891, "step": 7241 }, { "epoch": 1.98, "grad_norm": 1.7064685439725689, "learning_rate": 2.7515422922658895e-06, "loss": 0.1132, "step": 7242 }, { "epoch": 1.98, "grad_norm": 1.6062990593945012, "learning_rate": 2.750226121981965e-06, "loss": 0.1035, "step": 7243 }, { "epoch": 1.98, "grad_norm": 1.3464676455187288, "learning_rate": 2.748910147134546e-06, "loss": 0.0757, "step": 7244 }, { "epoch": 1.98, "grad_norm": 1.8028790340029268, "learning_rate": 2.7475943678379474e-06, "loss": 0.1146, "step": 7245 }, { "epoch": 1.98, "grad_norm": 1.486442315788568, "learning_rate": 2.7462787842064753e-06, "loss": 0.0904, "step": 7246 }, { "epoch": 1.98, "grad_norm": 1.3316406424539249, "learning_rate": 2.7449633963544085e-06, "loss": 0.0765, "step": 7247 }, { "epoch": 1.98, "grad_norm": 1.6877050824710027, "learning_rate": 2.743648204396019e-06, "loss": 0.1097, "step": 7248 }, { "epoch": 1.98, "grad_norm": 1.4288566936345768, "learning_rate": 2.7423332084455543e-06, "loss": 0.0989, "step": 7249 }, { "epoch": 1.98, "grad_norm": 1.651835742041863, "learning_rate": 2.7410184086172477e-06, "loss": 0.1033, "step": 7250 }, { "epoch": 1.98, "grad_norm": 1.5573793818989963, "learning_rate": 2.7397038050253122e-06, "loss": 0.0933, "step": 7251 }, { "epoch": 1.98, "grad_norm": 1.6267708221021036, "learning_rate": 2.7383893977839513e-06, "loss": 0.1049, "step": 7252 }, { "epoch": 1.98, "grad_norm": 1.5069900939669867, "learning_rate": 2.7370751870073433e-06, "loss": 0.1052, "step": 7253 }, { "epoch": 1.98, "grad_norm": 1.67892337249066, "learning_rate": 2.7357611728096554e-06, "loss": 0.1056, "step": 7254 }, { "epoch": 1.98, "grad_norm": 1.6107600017331722, "learning_rate": 2.7344473553050343e-06, "loss": 0.0906, "step": 7255 }, { "epoch": 1.98, "grad_norm": 1.6241641450450923, "learning_rate": 2.7331337346076105e-06, "loss": 0.1056, "step": 7256 }, { "epoch": 1.98, "grad_norm": 1.785500657253143, "learning_rate": 2.7318203108314946e-06, "loss": 0.1091, "step": 7257 }, { "epoch": 1.98, "grad_norm": 1.54328979219966, "learning_rate": 2.7305070840907878e-06, "loss": 0.0999, "step": 7258 }, { "epoch": 1.98, "grad_norm": 1.7276050688772684, "learning_rate": 2.7291940544995655e-06, "loss": 0.096, "step": 7259 }, { "epoch": 1.98, "grad_norm": 1.6506816192588067, "learning_rate": 2.7278812221718927e-06, "loss": 0.1107, "step": 7260 }, { "epoch": 1.98, "grad_norm": 1.631150693250893, "learning_rate": 2.7265685872218133e-06, "loss": 0.1026, "step": 7261 }, { "epoch": 1.98, "grad_norm": 2.009749878469168, "learning_rate": 2.7252561497633546e-06, "loss": 0.1256, "step": 7262 }, { "epoch": 1.98, "grad_norm": 1.7850088002472462, "learning_rate": 2.723943909910526e-06, "loss": 0.1281, "step": 7263 }, { "epoch": 1.98, "grad_norm": 1.6519711705112885, "learning_rate": 2.7226318677773243e-06, "loss": 0.0953, "step": 7264 }, { "epoch": 1.98, "grad_norm": 1.7023963356303091, "learning_rate": 2.7213200234777215e-06, "loss": 0.1058, "step": 7265 }, { "epoch": 1.98, "grad_norm": 1.6359390387739592, "learning_rate": 2.720008377125682e-06, "loss": 0.1044, "step": 7266 }, { "epoch": 1.98, "grad_norm": 1.5748952679884474, "learning_rate": 2.7186969288351438e-06, "loss": 0.0974, "step": 7267 }, { "epoch": 1.98, "grad_norm": 1.5528687813587494, "learning_rate": 2.717385678720034e-06, "loss": 0.0956, "step": 7268 }, { "epoch": 1.98, "grad_norm": 1.552913459266542, "learning_rate": 2.716074626894256e-06, "loss": 0.085, "step": 7269 }, { "epoch": 1.98, "grad_norm": 1.6228902919900716, "learning_rate": 2.714763773471706e-06, "loss": 0.1017, "step": 7270 }, { "epoch": 1.98, "grad_norm": 1.7527929220348546, "learning_rate": 2.7134531185662503e-06, "loss": 0.1131, "step": 7271 }, { "epoch": 1.99, "grad_norm": 1.3411667009347046, "learning_rate": 2.712142662291752e-06, "loss": 0.0756, "step": 7272 }, { "epoch": 1.99, "grad_norm": 1.7774831166574165, "learning_rate": 2.710832404762045e-06, "loss": 0.1089, "step": 7273 }, { "epoch": 1.99, "grad_norm": 1.5581469711660068, "learning_rate": 2.7095223460909527e-06, "loss": 0.091, "step": 7274 }, { "epoch": 1.99, "grad_norm": 1.7317721214944743, "learning_rate": 2.7082124863922753e-06, "loss": 0.1099, "step": 7275 }, { "epoch": 1.99, "grad_norm": 1.3953330923989558, "learning_rate": 2.706902825779804e-06, "loss": 0.0862, "step": 7276 }, { "epoch": 1.99, "grad_norm": 1.5237344771688994, "learning_rate": 2.705593364367305e-06, "loss": 0.0965, "step": 7277 }, { "epoch": 1.99, "grad_norm": 1.8694709926866722, "learning_rate": 2.704284102268534e-06, "loss": 0.1121, "step": 7278 }, { "epoch": 1.99, "grad_norm": 1.7153002321741082, "learning_rate": 2.702975039597223e-06, "loss": 0.1029, "step": 7279 }, { "epoch": 1.99, "grad_norm": 1.6831223233200074, "learning_rate": 2.7016661764670917e-06, "loss": 0.0984, "step": 7280 }, { "epoch": 1.99, "grad_norm": 1.6794019385659853, "learning_rate": 2.700357512991836e-06, "loss": 0.1023, "step": 7281 }, { "epoch": 1.99, "grad_norm": 1.5072118646135964, "learning_rate": 2.6990490492851408e-06, "loss": 0.084, "step": 7282 }, { "epoch": 1.99, "grad_norm": 1.7878137781908692, "learning_rate": 2.697740785460675e-06, "loss": 0.1213, "step": 7283 }, { "epoch": 1.99, "grad_norm": 1.7805820895861189, "learning_rate": 2.696432721632082e-06, "loss": 0.1245, "step": 7284 }, { "epoch": 1.99, "grad_norm": 1.4809674169756688, "learning_rate": 2.695124857912998e-06, "loss": 0.0829, "step": 7285 }, { "epoch": 1.99, "grad_norm": 1.6097491242758988, "learning_rate": 2.693817194417029e-06, "loss": 0.0996, "step": 7286 }, { "epoch": 1.99, "grad_norm": 1.5983240073255753, "learning_rate": 2.6925097312577766e-06, "loss": 0.089, "step": 7287 }, { "epoch": 1.99, "grad_norm": 1.7929460376267201, "learning_rate": 2.6912024685488157e-06, "loss": 0.106, "step": 7288 }, { "epoch": 1.99, "grad_norm": 1.5870922776565113, "learning_rate": 2.6898954064037107e-06, "loss": 0.0813, "step": 7289 }, { "epoch": 1.99, "grad_norm": 1.7451435979680616, "learning_rate": 2.6885885449360027e-06, "loss": 0.1053, "step": 7290 }, { "epoch": 1.99, "grad_norm": 1.6579836738803748, "learning_rate": 2.687281884259223e-06, "loss": 0.103, "step": 7291 }, { "epoch": 1.99, "grad_norm": 1.7626971308146393, "learning_rate": 2.685975424486872e-06, "loss": 0.0952, "step": 7292 }, { "epoch": 1.99, "grad_norm": 1.610609072312392, "learning_rate": 2.6846691657324473e-06, "loss": 0.1025, "step": 7293 }, { "epoch": 1.99, "grad_norm": 1.5128506597178897, "learning_rate": 2.6833631081094197e-06, "loss": 0.0804, "step": 7294 }, { "epoch": 1.99, "grad_norm": 1.7390467726294139, "learning_rate": 2.682057251731249e-06, "loss": 0.0992, "step": 7295 }, { "epoch": 1.99, "grad_norm": 1.8714125602694576, "learning_rate": 2.68075159671137e-06, "loss": 0.1181, "step": 7296 }, { "epoch": 1.99, "grad_norm": 1.7170208327461447, "learning_rate": 2.67944614316321e-06, "loss": 0.1028, "step": 7297 }, { "epoch": 1.99, "grad_norm": 1.747307017217943, "learning_rate": 2.678140891200166e-06, "loss": 0.103, "step": 7298 }, { "epoch": 1.99, "grad_norm": 1.7633026272952508, "learning_rate": 2.67683584093563e-06, "loss": 0.1136, "step": 7299 }, { "epoch": 1.99, "grad_norm": 1.4899012096744266, "learning_rate": 2.6755309924829657e-06, "loss": 0.0832, "step": 7300 }, { "epoch": 1.99, "grad_norm": 2.0455858058549876, "learning_rate": 2.67422634595553e-06, "loss": 0.1199, "step": 7301 }, { "epoch": 1.99, "grad_norm": 1.7978889943576057, "learning_rate": 2.6729219014666525e-06, "loss": 0.1008, "step": 7302 }, { "epoch": 1.99, "grad_norm": 1.6481999397348024, "learning_rate": 2.671617659129655e-06, "loss": 0.0862, "step": 7303 }, { "epoch": 1.99, "grad_norm": 1.9390660617091382, "learning_rate": 2.6703136190578287e-06, "loss": 0.1134, "step": 7304 }, { "epoch": 1.99, "grad_norm": 1.617260294334028, "learning_rate": 2.6690097813644605e-06, "loss": 0.1002, "step": 7305 }, { "epoch": 1.99, "grad_norm": 1.7063717681422734, "learning_rate": 2.6677061461628107e-06, "loss": 0.107, "step": 7306 }, { "epoch": 1.99, "grad_norm": 1.7514628430293397, "learning_rate": 2.6664027135661276e-06, "loss": 0.1077, "step": 7307 }, { "epoch": 2.0, "grad_norm": 1.6180866602492607, "learning_rate": 2.6650994836876375e-06, "loss": 0.0987, "step": 7308 }, { "epoch": 2.0, "grad_norm": 1.5463802326109355, "learning_rate": 2.663796456640556e-06, "loss": 0.1033, "step": 7309 }, { "epoch": 2.0, "grad_norm": 1.6570079090517802, "learning_rate": 2.662493632538069e-06, "loss": 0.0999, "step": 7310 }, { "epoch": 2.0, "grad_norm": 1.8068238154761658, "learning_rate": 2.6611910114933574e-06, "loss": 0.1106, "step": 7311 }, { "epoch": 2.0, "grad_norm": 1.605067896762438, "learning_rate": 2.6598885936195764e-06, "loss": 0.0979, "step": 7312 }, { "epoch": 2.0, "grad_norm": 1.6824570821489555, "learning_rate": 2.658586379029868e-06, "loss": 0.108, "step": 7313 }, { "epoch": 2.0, "grad_norm": 1.4339261810768502, "learning_rate": 2.657284367837355e-06, "loss": 0.0803, "step": 7314 }, { "epoch": 2.0, "grad_norm": 1.6340442260029877, "learning_rate": 2.6559825601551408e-06, "loss": 0.0981, "step": 7315 }, { "epoch": 2.0, "grad_norm": 1.730102882418146, "learning_rate": 2.6546809560963116e-06, "loss": 0.0862, "step": 7316 }, { "epoch": 2.0, "grad_norm": 2.1579266581057506, "learning_rate": 2.6533795557739407e-06, "loss": 0.1177, "step": 7317 }, { "epoch": 2.0, "grad_norm": 1.9444952685341077, "learning_rate": 2.6520783593010757e-06, "loss": 0.1419, "step": 7318 }, { "epoch": 2.0, "grad_norm": 1.2848759468870636, "learning_rate": 2.6507773667907556e-06, "loss": 0.0662, "step": 7319 }, { "epoch": 2.0, "grad_norm": 1.5879438239999113, "learning_rate": 2.6494765783559933e-06, "loss": 0.0893, "step": 7320 }, { "epoch": 2.0, "grad_norm": 1.5386976159788832, "learning_rate": 2.648175994109789e-06, "loss": 0.0934, "step": 7321 }, { "epoch": 2.0, "grad_norm": 1.6614816690175895, "learning_rate": 2.646875614165121e-06, "loss": 0.1043, "step": 7322 }, { "epoch": 2.0, "grad_norm": 1.619953320479506, "learning_rate": 2.6455754386349564e-06, "loss": 0.0925, "step": 7323 }, { "epoch": 2.0, "grad_norm": 1.4722674062384284, "learning_rate": 2.6442754676322367e-06, "loss": 0.0805, "step": 7324 }, { "epoch": 2.0, "grad_norm": 1.4799403335139003, "learning_rate": 2.642975701269894e-06, "loss": 0.082, "step": 7325 }, { "epoch": 2.0, "grad_norm": 1.435261160281723, "learning_rate": 2.6416761396608365e-06, "loss": 0.0748, "step": 7326 }, { "epoch": 2.0, "grad_norm": 1.3568963328331998, "learning_rate": 2.6403767829179554e-06, "loss": 0.0637, "step": 7327 }, { "epoch": 2.0, "grad_norm": 1.2740169651542217, "learning_rate": 2.6390776311541233e-06, "loss": 0.0563, "step": 7328 }, { "epoch": 2.0, "grad_norm": 1.2567758384707397, "learning_rate": 2.6377786844822016e-06, "loss": 0.0563, "step": 7329 }, { "epoch": 2.0, "grad_norm": 1.3120807912587755, "learning_rate": 2.6364799430150233e-06, "loss": 0.065, "step": 7330 }, { "epoch": 2.0, "grad_norm": 1.217007382454917, "learning_rate": 2.635181406865415e-06, "loss": 0.0527, "step": 7331 }, { "epoch": 2.0, "grad_norm": 1.3872972378576565, "learning_rate": 2.6338830761461775e-06, "loss": 0.0723, "step": 7332 }, { "epoch": 2.0, "grad_norm": 1.4179324463064191, "learning_rate": 2.6325849509700952e-06, "loss": 0.0591, "step": 7333 }, { "epoch": 2.0, "grad_norm": 1.2798805250707819, "learning_rate": 2.6312870314499335e-06, "loss": 0.0588, "step": 7334 }, { "epoch": 2.0, "grad_norm": 1.1434480405905167, "learning_rate": 2.629989317698446e-06, "loss": 0.0476, "step": 7335 }, { "epoch": 2.0, "grad_norm": 1.1577792024372717, "learning_rate": 2.628691809828361e-06, "loss": 0.0471, "step": 7336 }, { "epoch": 2.0, "grad_norm": 1.4374254104606945, "learning_rate": 2.6273945079523955e-06, "loss": 0.0514, "step": 7337 }, { "epoch": 2.0, "grad_norm": 1.372018751991338, "learning_rate": 2.626097412183244e-06, "loss": 0.0537, "step": 7338 }, { "epoch": 2.0, "grad_norm": 1.4467715580539955, "learning_rate": 2.624800522633584e-06, "loss": 0.0585, "step": 7339 }, { "epoch": 2.0, "grad_norm": 1.387471906589724, "learning_rate": 2.623503839416073e-06, "loss": 0.0508, "step": 7340 }, { "epoch": 2.0, "grad_norm": 1.4256426230446908, "learning_rate": 2.6222073626433587e-06, "loss": 0.0522, "step": 7341 }, { "epoch": 2.0, "grad_norm": 1.6542905353343873, "learning_rate": 2.620911092428059e-06, "loss": 0.0556, "step": 7342 }, { "epoch": 2.0, "grad_norm": 1.6773813074210058, "learning_rate": 2.619615028882786e-06, "loss": 0.0514, "step": 7343 }, { "epoch": 2.0, "grad_norm": 1.408097651400215, "learning_rate": 2.618319172120125e-06, "loss": 0.0433, "step": 7344 }, { "epoch": 2.01, "grad_norm": 1.3240745528452251, "learning_rate": 2.6170235222526467e-06, "loss": 0.0438, "step": 7345 }, { "epoch": 2.01, "grad_norm": 1.4711598680949536, "learning_rate": 2.615728079392902e-06, "loss": 0.0449, "step": 7346 }, { "epoch": 2.01, "grad_norm": 1.8441296659576043, "learning_rate": 2.614432843653427e-06, "loss": 0.0701, "step": 7347 }, { "epoch": 2.01, "grad_norm": 1.6548224370969575, "learning_rate": 2.6131378151467367e-06, "loss": 0.0522, "step": 7348 }, { "epoch": 2.01, "grad_norm": 1.7068503164507207, "learning_rate": 2.6118429939853324e-06, "loss": 0.0565, "step": 7349 }, { "epoch": 2.01, "grad_norm": 1.6759990171934467, "learning_rate": 2.6105483802816922e-06, "loss": 0.0518, "step": 7350 }, { "epoch": 2.01, "grad_norm": 1.2740941025993895, "learning_rate": 2.609253974148278e-06, "loss": 0.0341, "step": 7351 }, { "epoch": 2.01, "grad_norm": 1.4751201330416444, "learning_rate": 2.6079597756975335e-06, "loss": 0.0536, "step": 7352 }, { "epoch": 2.01, "grad_norm": 1.688395100770885, "learning_rate": 2.6066657850418873e-06, "loss": 0.0605, "step": 7353 }, { "epoch": 2.01, "grad_norm": 1.591832562245072, "learning_rate": 2.6053720022937455e-06, "loss": 0.0475, "step": 7354 }, { "epoch": 2.01, "grad_norm": 1.5753955779859328, "learning_rate": 2.6040784275655008e-06, "loss": 0.0472, "step": 7355 }, { "epoch": 2.01, "grad_norm": 1.5256774696345954, "learning_rate": 2.6027850609695227e-06, "loss": 0.0523, "step": 7356 }, { "epoch": 2.01, "grad_norm": 1.617633273581408, "learning_rate": 2.601491902618167e-06, "loss": 0.0491, "step": 7357 }, { "epoch": 2.01, "grad_norm": 1.558444597630549, "learning_rate": 2.6001989526237658e-06, "loss": 0.0549, "step": 7358 }, { "epoch": 2.01, "grad_norm": 1.4077567773627562, "learning_rate": 2.598906211098643e-06, "loss": 0.0462, "step": 7359 }, { "epoch": 2.01, "grad_norm": 1.4776447362686629, "learning_rate": 2.597613678155092e-06, "loss": 0.0439, "step": 7360 }, { "epoch": 2.01, "grad_norm": 1.4333624272881615, "learning_rate": 2.5963213539054e-06, "loss": 0.0475, "step": 7361 }, { "epoch": 2.01, "grad_norm": 1.4189783014139743, "learning_rate": 2.595029238461827e-06, "loss": 0.0431, "step": 7362 }, { "epoch": 2.01, "grad_norm": 1.3922648000311357, "learning_rate": 2.59373733193662e-06, "loss": 0.0524, "step": 7363 }, { "epoch": 2.01, "grad_norm": 1.5123864265977387, "learning_rate": 2.592445634442003e-06, "loss": 0.0493, "step": 7364 }, { "epoch": 2.01, "grad_norm": 1.6550714555230197, "learning_rate": 2.591154146090189e-06, "loss": 0.0511, "step": 7365 }, { "epoch": 2.01, "grad_norm": 1.446265961660988, "learning_rate": 2.5898628669933657e-06, "loss": 0.0627, "step": 7366 }, { "epoch": 2.01, "grad_norm": 1.4225404011018752, "learning_rate": 2.588571797263708e-06, "loss": 0.0481, "step": 7367 }, { "epoch": 2.01, "grad_norm": 1.6611383821426202, "learning_rate": 2.5872809370133704e-06, "loss": 0.0529, "step": 7368 }, { "epoch": 2.01, "grad_norm": 1.3008208415512186, "learning_rate": 2.5859902863544884e-06, "loss": 0.0414, "step": 7369 }, { "epoch": 2.01, "grad_norm": 1.4238148948160205, "learning_rate": 2.5846998453991767e-06, "loss": 0.0513, "step": 7370 }, { "epoch": 2.01, "grad_norm": 1.4617546079497878, "learning_rate": 2.583409614259541e-06, "loss": 0.0476, "step": 7371 }, { "epoch": 2.01, "grad_norm": 1.0786328938246228, "learning_rate": 2.5821195930476584e-06, "loss": 0.0408, "step": 7372 }, { "epoch": 2.01, "grad_norm": 1.5300018109719198, "learning_rate": 2.5808297818755956e-06, "loss": 0.0494, "step": 7373 }, { "epoch": 2.01, "grad_norm": 1.1106590652643458, "learning_rate": 2.5795401808553966e-06, "loss": 0.0326, "step": 7374 }, { "epoch": 2.01, "grad_norm": 1.3793240569270968, "learning_rate": 2.5782507900990863e-06, "loss": 0.0535, "step": 7375 }, { "epoch": 2.01, "grad_norm": 1.4542917979982788, "learning_rate": 2.5769616097186757e-06, "loss": 0.0489, "step": 7376 }, { "epoch": 2.01, "grad_norm": 1.6487980627515233, "learning_rate": 2.575672639826153e-06, "loss": 0.0517, "step": 7377 }, { "epoch": 2.01, "grad_norm": 1.433688994609693, "learning_rate": 2.574383880533493e-06, "loss": 0.0438, "step": 7378 }, { "epoch": 2.01, "grad_norm": 1.5210258353656534, "learning_rate": 2.573095331952646e-06, "loss": 0.0485, "step": 7379 }, { "epoch": 2.01, "grad_norm": 1.4086045465835808, "learning_rate": 2.5718069941955535e-06, "loss": 0.046, "step": 7380 }, { "epoch": 2.02, "grad_norm": 2.2875572746444996, "learning_rate": 2.5705188673741253e-06, "loss": 0.057, "step": 7381 }, { "epoch": 2.02, "grad_norm": 1.4561133844636518, "learning_rate": 2.5692309516002643e-06, "loss": 0.0475, "step": 7382 }, { "epoch": 2.02, "grad_norm": 1.2815168270673225, "learning_rate": 2.567943246985849e-06, "loss": 0.0428, "step": 7383 }, { "epoch": 2.02, "grad_norm": 1.823361314780626, "learning_rate": 2.5666557536427445e-06, "loss": 0.0397, "step": 7384 }, { "epoch": 2.02, "grad_norm": 1.468079471269978, "learning_rate": 2.5653684716827904e-06, "loss": 0.0483, "step": 7385 }, { "epoch": 2.02, "grad_norm": 1.4127813118974655, "learning_rate": 2.5640814012178182e-06, "loss": 0.051, "step": 7386 }, { "epoch": 2.02, "grad_norm": 1.1636121526870367, "learning_rate": 2.562794542359628e-06, "loss": 0.0437, "step": 7387 }, { "epoch": 2.02, "grad_norm": 1.7615040124789034, "learning_rate": 2.5615078952200125e-06, "loss": 0.0605, "step": 7388 }, { "epoch": 2.02, "grad_norm": 1.6512237153234959, "learning_rate": 2.56022145991074e-06, "loss": 0.0523, "step": 7389 }, { "epoch": 2.02, "grad_norm": 1.6503977102223706, "learning_rate": 2.558935236543565e-06, "loss": 0.051, "step": 7390 }, { "epoch": 2.02, "grad_norm": 1.3603800327432807, "learning_rate": 2.557649225230219e-06, "loss": 0.0471, "step": 7391 }, { "epoch": 2.02, "grad_norm": 1.558341517709461, "learning_rate": 2.556363426082418e-06, "loss": 0.0518, "step": 7392 }, { "epoch": 2.02, "grad_norm": 1.41757345379561, "learning_rate": 2.5550778392118557e-06, "loss": 0.046, "step": 7393 }, { "epoch": 2.02, "grad_norm": 1.5505715332213015, "learning_rate": 2.5537924647302146e-06, "loss": 0.0597, "step": 7394 }, { "epoch": 2.02, "grad_norm": 1.6234831606488078, "learning_rate": 2.5525073027491504e-06, "loss": 0.0462, "step": 7395 }, { "epoch": 2.02, "grad_norm": 1.5554681912972133, "learning_rate": 2.5512223533803084e-06, "loss": 0.0442, "step": 7396 }, { "epoch": 2.02, "grad_norm": 1.6043505882955307, "learning_rate": 2.5499376167353097e-06, "loss": 0.0563, "step": 7397 }, { "epoch": 2.02, "grad_norm": 1.410214041468284, "learning_rate": 2.5486530929257574e-06, "loss": 0.0479, "step": 7398 }, { "epoch": 2.02, "grad_norm": 1.5013271592069959, "learning_rate": 2.5473687820632365e-06, "loss": 0.053, "step": 7399 }, { "epoch": 2.02, "grad_norm": 1.4422673286773653, "learning_rate": 2.546084684259318e-06, "loss": 0.0488, "step": 7400 }, { "epoch": 2.02, "grad_norm": 1.4239287133665368, "learning_rate": 2.5448007996255463e-06, "loss": 0.0504, "step": 7401 }, { "epoch": 2.02, "grad_norm": 1.383135698870463, "learning_rate": 2.5435171282734563e-06, "loss": 0.0419, "step": 7402 }, { "epoch": 2.02, "grad_norm": 1.4730217323645354, "learning_rate": 2.542233670314558e-06, "loss": 0.0547, "step": 7403 }, { "epoch": 2.02, "grad_norm": 1.4428264350338156, "learning_rate": 2.5409504258603436e-06, "loss": 0.0482, "step": 7404 }, { "epoch": 2.02, "grad_norm": 1.4679914670242349, "learning_rate": 2.5396673950222863e-06, "loss": 0.0509, "step": 7405 }, { "epoch": 2.02, "grad_norm": 1.6079100410897562, "learning_rate": 2.5383845779118453e-06, "loss": 0.0533, "step": 7406 }, { "epoch": 2.02, "grad_norm": 1.5992690830740246, "learning_rate": 2.5371019746404564e-06, "loss": 0.0535, "step": 7407 }, { "epoch": 2.02, "grad_norm": 1.509529496556637, "learning_rate": 2.535819585319541e-06, "loss": 0.0513, "step": 7408 }, { "epoch": 2.02, "grad_norm": 1.3454993188122852, "learning_rate": 2.534537410060497e-06, "loss": 0.0445, "step": 7409 }, { "epoch": 2.02, "grad_norm": 1.396632980280583, "learning_rate": 2.5332554489747076e-06, "loss": 0.0507, "step": 7410 }, { "epoch": 2.02, "grad_norm": 1.5057522271667847, "learning_rate": 2.531973702173533e-06, "loss": 0.0521, "step": 7411 }, { "epoch": 2.02, "grad_norm": 1.549303596754514, "learning_rate": 2.5306921697683216e-06, "loss": 0.0484, "step": 7412 }, { "epoch": 2.02, "grad_norm": 1.2432104638500188, "learning_rate": 2.529410851870397e-06, "loss": 0.0393, "step": 7413 }, { "epoch": 2.02, "grad_norm": 1.610559354259884, "learning_rate": 2.5281297485910684e-06, "loss": 0.0577, "step": 7414 }, { "epoch": 2.02, "grad_norm": 1.7158197403596251, "learning_rate": 2.526848860041624e-06, "loss": 0.0498, "step": 7415 }, { "epoch": 2.02, "grad_norm": 1.4251336752988741, "learning_rate": 2.5255681863333325e-06, "loss": 0.0469, "step": 7416 }, { "epoch": 2.02, "grad_norm": 1.6794591194943982, "learning_rate": 2.5242877275774446e-06, "loss": 0.0609, "step": 7417 }, { "epoch": 2.03, "grad_norm": 1.5752059402759766, "learning_rate": 2.523007483885196e-06, "loss": 0.0539, "step": 7418 }, { "epoch": 2.03, "grad_norm": 1.5811898924718548, "learning_rate": 2.5217274553677975e-06, "loss": 0.0482, "step": 7419 }, { "epoch": 2.03, "grad_norm": 1.4837178677679705, "learning_rate": 2.5204476421364475e-06, "loss": 0.0454, "step": 7420 }, { "epoch": 2.03, "grad_norm": 1.6456402158089958, "learning_rate": 2.5191680443023214e-06, "loss": 0.0581, "step": 7421 }, { "epoch": 2.03, "grad_norm": 1.4849872239333415, "learning_rate": 2.5178886619765764e-06, "loss": 0.0484, "step": 7422 }, { "epoch": 2.03, "grad_norm": 1.5699893075348668, "learning_rate": 2.516609495270351e-06, "loss": 0.0577, "step": 7423 }, { "epoch": 2.03, "grad_norm": 1.5577087552874045, "learning_rate": 2.515330544294768e-06, "loss": 0.053, "step": 7424 }, { "epoch": 2.03, "grad_norm": 1.4895696556941862, "learning_rate": 2.5140518091609254e-06, "loss": 0.0417, "step": 7425 }, { "epoch": 2.03, "grad_norm": 1.390785552920445, "learning_rate": 2.512773289979911e-06, "loss": 0.0425, "step": 7426 }, { "epoch": 2.03, "grad_norm": 1.4305359561950748, "learning_rate": 2.5114949868627867e-06, "loss": 0.0452, "step": 7427 }, { "epoch": 2.03, "grad_norm": 1.3933955787765868, "learning_rate": 2.510216899920598e-06, "loss": 0.0467, "step": 7428 }, { "epoch": 2.03, "grad_norm": 2.232316743659909, "learning_rate": 2.5089390292643686e-06, "loss": 0.0525, "step": 7429 }, { "epoch": 2.03, "grad_norm": 1.6024451789703997, "learning_rate": 2.5076613750051113e-06, "loss": 0.0496, "step": 7430 }, { "epoch": 2.03, "grad_norm": 1.5597350045047362, "learning_rate": 2.5063839372538112e-06, "loss": 0.0503, "step": 7431 }, { "epoch": 2.03, "grad_norm": 1.458295372823315, "learning_rate": 2.5051067161214414e-06, "loss": 0.0515, "step": 7432 }, { "epoch": 2.03, "grad_norm": 1.559836412126493, "learning_rate": 2.5038297117189535e-06, "loss": 0.052, "step": 7433 }, { "epoch": 2.03, "grad_norm": 1.5286929700263967, "learning_rate": 2.502552924157278e-06, "loss": 0.0468, "step": 7434 }, { "epoch": 2.03, "grad_norm": 1.5490301309509895, "learning_rate": 2.501276353547327e-06, "loss": 0.0479, "step": 7435 }, { "epoch": 2.03, "grad_norm": 1.344690950141608, "learning_rate": 2.5000000000000015e-06, "loss": 0.0446, "step": 7436 }, { "epoch": 2.03, "grad_norm": 1.6782629746954085, "learning_rate": 2.4987238636261705e-06, "loss": 0.0566, "step": 7437 }, { "epoch": 2.03, "grad_norm": 1.7506567297002045, "learning_rate": 2.4974479445366973e-06, "loss": 0.0535, "step": 7438 }, { "epoch": 2.03, "grad_norm": 1.595440966679077, "learning_rate": 2.4961722428424177e-06, "loss": 0.04, "step": 7439 }, { "epoch": 2.03, "grad_norm": 1.648204419915552, "learning_rate": 2.4948967586541508e-06, "loss": 0.0525, "step": 7440 }, { "epoch": 2.03, "grad_norm": 1.6446411292681524, "learning_rate": 2.4936214920826956e-06, "loss": 0.0484, "step": 7441 }, { "epoch": 2.03, "grad_norm": 1.3760623284832816, "learning_rate": 2.4923464432388373e-06, "loss": 0.0448, "step": 7442 }, { "epoch": 2.03, "grad_norm": 1.643020447031329, "learning_rate": 2.4910716122333352e-06, "loss": 0.0496, "step": 7443 }, { "epoch": 2.03, "grad_norm": 1.3621315461022658, "learning_rate": 2.489796999176936e-06, "loss": 0.0428, "step": 7444 }, { "epoch": 2.03, "grad_norm": 1.474105265027352, "learning_rate": 2.488522604180364e-06, "loss": 0.0462, "step": 7445 }, { "epoch": 2.03, "grad_norm": 1.478295408482525, "learning_rate": 2.487248427354324e-06, "loss": 0.0517, "step": 7446 }, { "epoch": 2.03, "grad_norm": 1.535593749311935, "learning_rate": 2.4859744688095015e-06, "loss": 0.0492, "step": 7447 }, { "epoch": 2.03, "grad_norm": 1.601177095502935, "learning_rate": 2.484700728656569e-06, "loss": 0.0477, "step": 7448 }, { "epoch": 2.03, "grad_norm": 1.669743354640075, "learning_rate": 2.4834272070061706e-06, "loss": 0.0462, "step": 7449 }, { "epoch": 2.03, "grad_norm": 1.4411696460978156, "learning_rate": 2.4821539039689404e-06, "loss": 0.0427, "step": 7450 }, { "epoch": 2.03, "grad_norm": 1.6604570465520747, "learning_rate": 2.4808808196554877e-06, "loss": 0.0532, "step": 7451 }, { "epoch": 2.03, "grad_norm": 1.6137018233754035, "learning_rate": 2.479607954176406e-06, "loss": 0.0576, "step": 7452 }, { "epoch": 2.03, "grad_norm": 1.565220772210761, "learning_rate": 2.478335307642264e-06, "loss": 0.0485, "step": 7453 }, { "epoch": 2.03, "grad_norm": 1.4392445272742047, "learning_rate": 2.4770628801636205e-06, "loss": 0.0428, "step": 7454 }, { "epoch": 2.04, "grad_norm": 1.8259654542720174, "learning_rate": 2.475790671851007e-06, "loss": 0.0605, "step": 7455 }, { "epoch": 2.04, "grad_norm": 1.343402316311559, "learning_rate": 2.4745186828149435e-06, "loss": 0.0485, "step": 7456 }, { "epoch": 2.04, "grad_norm": 1.6412224725789413, "learning_rate": 2.473246913165925e-06, "loss": 0.0472, "step": 7457 }, { "epoch": 2.04, "grad_norm": 1.394987341594411, "learning_rate": 2.4719753630144283e-06, "loss": 0.0538, "step": 7458 }, { "epoch": 2.04, "grad_norm": 1.4981573299034243, "learning_rate": 2.4707040324709115e-06, "loss": 0.0503, "step": 7459 }, { "epoch": 2.04, "grad_norm": 1.3451634809157227, "learning_rate": 2.469432921645818e-06, "loss": 0.0442, "step": 7460 }, { "epoch": 2.04, "grad_norm": 1.4369916909061546, "learning_rate": 2.4681620306495634e-06, "loss": 0.0426, "step": 7461 }, { "epoch": 2.04, "grad_norm": 1.4846313308218781, "learning_rate": 2.4668913595925548e-06, "loss": 0.0521, "step": 7462 }, { "epoch": 2.04, "grad_norm": 1.277636252275021, "learning_rate": 2.4656209085851712e-06, "loss": 0.0428, "step": 7463 }, { "epoch": 2.04, "grad_norm": 1.3098100017505718, "learning_rate": 2.464350677737777e-06, "loss": 0.0391, "step": 7464 }, { "epoch": 2.04, "grad_norm": 1.427784442839232, "learning_rate": 2.463080667160714e-06, "loss": 0.0516, "step": 7465 }, { "epoch": 2.04, "grad_norm": 1.696646045226093, "learning_rate": 2.4618108769643105e-06, "loss": 0.054, "step": 7466 }, { "epoch": 2.04, "grad_norm": 1.5385584582936893, "learning_rate": 2.4605413072588702e-06, "loss": 0.0439, "step": 7467 }, { "epoch": 2.04, "grad_norm": 1.6374315347955912, "learning_rate": 2.4592719581546826e-06, "loss": 0.0505, "step": 7468 }, { "epoch": 2.04, "grad_norm": 1.6202139145784817, "learning_rate": 2.458002829762013e-06, "loss": 0.0502, "step": 7469 }, { "epoch": 2.04, "grad_norm": 1.6485678714691454, "learning_rate": 2.4567339221911086e-06, "loss": 0.0524, "step": 7470 }, { "epoch": 2.04, "grad_norm": 1.7735600327315095, "learning_rate": 2.455465235552202e-06, "loss": 0.0625, "step": 7471 }, { "epoch": 2.04, "grad_norm": 1.492149330739676, "learning_rate": 2.4541967699555004e-06, "loss": 0.0392, "step": 7472 }, { "epoch": 2.04, "grad_norm": 1.4075805997822857, "learning_rate": 2.4529285255111974e-06, "loss": 0.0462, "step": 7473 }, { "epoch": 2.04, "grad_norm": 1.5386038381665181, "learning_rate": 2.4516605023294626e-06, "loss": 0.0466, "step": 7474 }, { "epoch": 2.04, "grad_norm": 1.367320301506333, "learning_rate": 2.4503927005204497e-06, "loss": 0.045, "step": 7475 }, { "epoch": 2.04, "grad_norm": 1.659796747824848, "learning_rate": 2.4491251201942882e-06, "loss": 0.0494, "step": 7476 }, { "epoch": 2.04, "grad_norm": 1.527278945743993, "learning_rate": 2.4478577614610975e-06, "loss": 0.0508, "step": 7477 }, { "epoch": 2.04, "grad_norm": 1.4836560247064783, "learning_rate": 2.4465906244309677e-06, "loss": 0.0416, "step": 7478 }, { "epoch": 2.04, "grad_norm": 1.4239991068320188, "learning_rate": 2.445323709213978e-06, "loss": 0.041, "step": 7479 }, { "epoch": 2.04, "grad_norm": 1.226835209340665, "learning_rate": 2.444057015920183e-06, "loss": 0.039, "step": 7480 }, { "epoch": 2.04, "grad_norm": 1.5524655882130292, "learning_rate": 2.4427905446596194e-06, "loss": 0.0438, "step": 7481 }, { "epoch": 2.04, "grad_norm": 1.3302898912936039, "learning_rate": 2.441524295542303e-06, "loss": 0.0455, "step": 7482 }, { "epoch": 2.04, "grad_norm": 1.4749669997387318, "learning_rate": 2.4402582686782354e-06, "loss": 0.0528, "step": 7483 }, { "epoch": 2.04, "grad_norm": 1.6239266235656735, "learning_rate": 2.4389924641773925e-06, "loss": 0.0541, "step": 7484 }, { "epoch": 2.04, "grad_norm": 1.7063224063434792, "learning_rate": 2.4377268821497375e-06, "loss": 0.0518, "step": 7485 }, { "epoch": 2.04, "grad_norm": 1.5313408510321915, "learning_rate": 2.4364615227052086e-06, "loss": 0.0488, "step": 7486 }, { "epoch": 2.04, "grad_norm": 1.4511069205582734, "learning_rate": 2.435196385953727e-06, "loss": 0.043, "step": 7487 }, { "epoch": 2.04, "grad_norm": 1.3635326906107355, "learning_rate": 2.4339314720051927e-06, "loss": 0.0445, "step": 7488 }, { "epoch": 2.04, "grad_norm": 1.63884359841843, "learning_rate": 2.432666780969491e-06, "loss": 0.0471, "step": 7489 }, { "epoch": 2.04, "grad_norm": 1.5528716654981882, "learning_rate": 2.4314023129564824e-06, "loss": 0.0459, "step": 7490 }, { "epoch": 2.05, "grad_norm": 1.5044901073500467, "learning_rate": 2.430138068076013e-06, "loss": 0.0505, "step": 7491 }, { "epoch": 2.05, "grad_norm": 1.3804834922620526, "learning_rate": 2.4288740464379057e-06, "loss": 0.055, "step": 7492 }, { "epoch": 2.05, "grad_norm": 1.5485970606536095, "learning_rate": 2.4276102481519655e-06, "loss": 0.055, "step": 7493 }, { "epoch": 2.05, "grad_norm": 1.8198067453404883, "learning_rate": 2.4263466733279756e-06, "loss": 0.0538, "step": 7494 }, { "epoch": 2.05, "grad_norm": 1.248492221541558, "learning_rate": 2.4250833220757054e-06, "loss": 0.0402, "step": 7495 }, { "epoch": 2.05, "grad_norm": 1.393824966633775, "learning_rate": 2.4238201945048983e-06, "loss": 0.0433, "step": 7496 }, { "epoch": 2.05, "grad_norm": 1.8301016736329818, "learning_rate": 2.4225572907252853e-06, "loss": 0.0654, "step": 7497 }, { "epoch": 2.05, "grad_norm": 1.6364757796590972, "learning_rate": 2.421294610846571e-06, "loss": 0.0634, "step": 7498 }, { "epoch": 2.05, "grad_norm": 1.4973883996622943, "learning_rate": 2.4200321549784455e-06, "loss": 0.0544, "step": 7499 }, { "epoch": 2.05, "grad_norm": 1.4207472298863366, "learning_rate": 2.4187699232305745e-06, "loss": 0.0505, "step": 7500 }, { "epoch": 2.05, "grad_norm": 1.639412770186421, "learning_rate": 2.4175079157126115e-06, "loss": 0.0556, "step": 7501 }, { "epoch": 2.05, "grad_norm": 1.5859266317595415, "learning_rate": 2.4162461325341816e-06, "loss": 0.0483, "step": 7502 }, { "epoch": 2.05, "grad_norm": 1.688275537229775, "learning_rate": 2.4149845738049007e-06, "loss": 0.0532, "step": 7503 }, { "epoch": 2.05, "grad_norm": 1.462156957982611, "learning_rate": 2.413723239634356e-06, "loss": 0.0446, "step": 7504 }, { "epoch": 2.05, "grad_norm": 1.3320981125435771, "learning_rate": 2.41246213013212e-06, "loss": 0.0477, "step": 7505 }, { "epoch": 2.05, "grad_norm": 1.496831387504434, "learning_rate": 2.4112012454077422e-06, "loss": 0.0528, "step": 7506 }, { "epoch": 2.05, "grad_norm": 1.4519157371185114, "learning_rate": 2.4099405855707585e-06, "loss": 0.0537, "step": 7507 }, { "epoch": 2.05, "grad_norm": 1.519177619896096, "learning_rate": 2.4086801507306783e-06, "loss": 0.0439, "step": 7508 }, { "epoch": 2.05, "grad_norm": 1.4687177155402915, "learning_rate": 2.4074199409969984e-06, "loss": 0.0471, "step": 7509 }, { "epoch": 2.05, "grad_norm": 1.5134619785816281, "learning_rate": 2.4061599564791906e-06, "loss": 0.0478, "step": 7510 }, { "epoch": 2.05, "grad_norm": 1.3224667379931698, "learning_rate": 2.4049001972867086e-06, "loss": 0.0421, "step": 7511 }, { "epoch": 2.05, "grad_norm": 1.6547161636983905, "learning_rate": 2.403640663528986e-06, "loss": 0.062, "step": 7512 }, { "epoch": 2.05, "grad_norm": 1.279436450402271, "learning_rate": 2.402381355315441e-06, "loss": 0.0402, "step": 7513 }, { "epoch": 2.05, "grad_norm": 1.6079417964693181, "learning_rate": 2.401122272755464e-06, "loss": 0.053, "step": 7514 }, { "epoch": 2.05, "grad_norm": 1.604471758190186, "learning_rate": 2.3998634159584365e-06, "loss": 0.0456, "step": 7515 }, { "epoch": 2.05, "grad_norm": 1.4442918534026248, "learning_rate": 2.398604785033712e-06, "loss": 0.0543, "step": 7516 }, { "epoch": 2.05, "grad_norm": 1.2814003626851682, "learning_rate": 2.397346380090626e-06, "loss": 0.0443, "step": 7517 }, { "epoch": 2.05, "grad_norm": 1.3672364848158798, "learning_rate": 2.396088201238495e-06, "loss": 0.0494, "step": 7518 }, { "epoch": 2.05, "grad_norm": 1.2890424896285348, "learning_rate": 2.3948302485866194e-06, "loss": 0.0399, "step": 7519 }, { "epoch": 2.05, "grad_norm": 1.623200219523274, "learning_rate": 2.3935725222442728e-06, "loss": 0.0478, "step": 7520 }, { "epoch": 2.05, "grad_norm": 1.4165671444426533, "learning_rate": 2.3923150223207176e-06, "loss": 0.0396, "step": 7521 }, { "epoch": 2.05, "grad_norm": 1.5192968090708392, "learning_rate": 2.391057748925189e-06, "loss": 0.0501, "step": 7522 }, { "epoch": 2.05, "grad_norm": 1.6285446009097, "learning_rate": 2.3898007021669068e-06, "loss": 0.0498, "step": 7523 }, { "epoch": 2.05, "grad_norm": 1.627294506939516, "learning_rate": 2.388543882155067e-06, "loss": 0.0551, "step": 7524 }, { "epoch": 2.05, "grad_norm": 2.079491390926197, "learning_rate": 2.3872872889988535e-06, "loss": 0.0694, "step": 7525 }, { "epoch": 2.05, "grad_norm": 1.8523640841048559, "learning_rate": 2.3860309228074213e-06, "loss": 0.0613, "step": 7526 }, { "epoch": 2.05, "grad_norm": 1.4936739729095836, "learning_rate": 2.3847747836899144e-06, "loss": 0.0455, "step": 7527 }, { "epoch": 2.06, "grad_norm": 1.387453667191235, "learning_rate": 2.383518871755451e-06, "loss": 0.0471, "step": 7528 }, { "epoch": 2.06, "grad_norm": 1.5305233749771534, "learning_rate": 2.3822631871131306e-06, "loss": 0.0566, "step": 7529 }, { "epoch": 2.06, "grad_norm": 1.4275702727064148, "learning_rate": 2.381007729872033e-06, "loss": 0.0433, "step": 7530 }, { "epoch": 2.06, "grad_norm": 1.2058282063974046, "learning_rate": 2.379752500141222e-06, "loss": 0.0417, "step": 7531 }, { "epoch": 2.06, "grad_norm": 1.5051223681557075, "learning_rate": 2.378497498029735e-06, "loss": 0.0462, "step": 7532 }, { "epoch": 2.06, "grad_norm": 1.399590767431314, "learning_rate": 2.3772427236465974e-06, "loss": 0.0518, "step": 7533 }, { "epoch": 2.06, "grad_norm": 1.4756301228571063, "learning_rate": 2.3759881771008088e-06, "loss": 0.0474, "step": 7534 }, { "epoch": 2.06, "grad_norm": 1.425308484059849, "learning_rate": 2.37473385850135e-06, "loss": 0.0497, "step": 7535 }, { "epoch": 2.06, "grad_norm": 1.6695545738068702, "learning_rate": 2.3734797679571826e-06, "loss": 0.0515, "step": 7536 }, { "epoch": 2.06, "grad_norm": 1.56954603327496, "learning_rate": 2.372225905577251e-06, "loss": 0.048, "step": 7537 }, { "epoch": 2.06, "grad_norm": 1.4803808616726128, "learning_rate": 2.370972271470475e-06, "loss": 0.0467, "step": 7538 }, { "epoch": 2.06, "grad_norm": 1.624043540279434, "learning_rate": 2.3697188657457592e-06, "loss": 0.061, "step": 7539 }, { "epoch": 2.06, "grad_norm": 2.028672661928012, "learning_rate": 2.3684656885119856e-06, "loss": 0.0586, "step": 7540 }, { "epoch": 2.06, "grad_norm": 1.195633559454792, "learning_rate": 2.367212739878017e-06, "loss": 0.0416, "step": 7541 }, { "epoch": 2.06, "grad_norm": 1.5847414277539829, "learning_rate": 2.3659600199526933e-06, "loss": 0.0484, "step": 7542 }, { "epoch": 2.06, "grad_norm": 1.4604825290204053, "learning_rate": 2.3647075288448423e-06, "loss": 0.0499, "step": 7543 }, { "epoch": 2.06, "grad_norm": 1.3517427438559508, "learning_rate": 2.3634552666632633e-06, "loss": 0.0442, "step": 7544 }, { "epoch": 2.06, "grad_norm": 1.634010794142163, "learning_rate": 2.362203233516743e-06, "loss": 0.0635, "step": 7545 }, { "epoch": 2.06, "grad_norm": 1.395742698915889, "learning_rate": 2.360951429514043e-06, "loss": 0.0481, "step": 7546 }, { "epoch": 2.06, "grad_norm": 1.4699971577633688, "learning_rate": 2.3596998547639066e-06, "loss": 0.0493, "step": 7547 }, { "epoch": 2.06, "grad_norm": 1.5654270394048593, "learning_rate": 2.3584485093750554e-06, "loss": 0.0429, "step": 7548 }, { "epoch": 2.06, "grad_norm": 1.5530620315914359, "learning_rate": 2.3571973934561978e-06, "loss": 0.0551, "step": 7549 }, { "epoch": 2.06, "grad_norm": 1.547121438041144, "learning_rate": 2.355946507116012e-06, "loss": 0.0509, "step": 7550 }, { "epoch": 2.06, "grad_norm": 1.3625585103568778, "learning_rate": 2.3546958504631666e-06, "loss": 0.0326, "step": 7551 }, { "epoch": 2.06, "grad_norm": 1.596851983142129, "learning_rate": 2.3534454236063036e-06, "loss": 0.0529, "step": 7552 }, { "epoch": 2.06, "grad_norm": 1.457607478887334, "learning_rate": 2.3521952266540466e-06, "loss": 0.0431, "step": 7553 }, { "epoch": 2.06, "grad_norm": 1.5848549870324236, "learning_rate": 2.3509452597149972e-06, "loss": 0.0514, "step": 7554 }, { "epoch": 2.06, "grad_norm": 1.5093190023146514, "learning_rate": 2.3496955228977437e-06, "loss": 0.0512, "step": 7555 }, { "epoch": 2.06, "grad_norm": 1.4306186650122474, "learning_rate": 2.3484460163108457e-06, "loss": 0.0518, "step": 7556 }, { "epoch": 2.06, "grad_norm": 1.6820046827272304, "learning_rate": 2.3471967400628513e-06, "loss": 0.0543, "step": 7557 }, { "epoch": 2.06, "grad_norm": 1.8762232605375888, "learning_rate": 2.3459476942622823e-06, "loss": 0.0578, "step": 7558 }, { "epoch": 2.06, "grad_norm": 1.4650275461258537, "learning_rate": 2.3446988790176425e-06, "loss": 0.0432, "step": 7559 }, { "epoch": 2.06, "grad_norm": 1.279400585314781, "learning_rate": 2.3434502944374137e-06, "loss": 0.0413, "step": 7560 }, { "epoch": 2.06, "grad_norm": 1.401065587262593, "learning_rate": 2.3422019406300617e-06, "loss": 0.0431, "step": 7561 }, { "epoch": 2.06, "grad_norm": 1.7711106598792907, "learning_rate": 2.3409538177040324e-06, "loss": 0.0426, "step": 7562 }, { "epoch": 2.06, "grad_norm": 1.5488316766119747, "learning_rate": 2.339705925767747e-06, "loss": 0.0441, "step": 7563 }, { "epoch": 2.06, "grad_norm": 1.4892965854736837, "learning_rate": 2.3384582649296093e-06, "loss": 0.0492, "step": 7564 }, { "epoch": 2.07, "grad_norm": 1.3209549733764223, "learning_rate": 2.337210835298002e-06, "loss": 0.0411, "step": 7565 }, { "epoch": 2.07, "grad_norm": 1.506860795917665, "learning_rate": 2.335963636981291e-06, "loss": 0.0535, "step": 7566 }, { "epoch": 2.07, "grad_norm": 1.4824930722399863, "learning_rate": 2.3347166700878165e-06, "loss": 0.0435, "step": 7567 }, { "epoch": 2.07, "grad_norm": 1.5624416421719172, "learning_rate": 2.3334699347259053e-06, "loss": 0.0478, "step": 7568 }, { "epoch": 2.07, "grad_norm": 1.469282830944571, "learning_rate": 2.332223431003859e-06, "loss": 0.0348, "step": 7569 }, { "epoch": 2.07, "grad_norm": 1.6469403463427092, "learning_rate": 2.33097715902996e-06, "loss": 0.0562, "step": 7570 }, { "epoch": 2.07, "grad_norm": 1.6192635478897062, "learning_rate": 2.32973111891247e-06, "loss": 0.0451, "step": 7571 }, { "epoch": 2.07, "grad_norm": 1.4604251326654094, "learning_rate": 2.328485310759635e-06, "loss": 0.0466, "step": 7572 }, { "epoch": 2.07, "grad_norm": 1.3994967606709425, "learning_rate": 2.3272397346796743e-06, "loss": 0.0422, "step": 7573 }, { "epoch": 2.07, "grad_norm": 1.4450697974333713, "learning_rate": 2.325994390780794e-06, "loss": 0.051, "step": 7574 }, { "epoch": 2.07, "grad_norm": 1.346727976837846, "learning_rate": 2.3247492791711744e-06, "loss": 0.0439, "step": 7575 }, { "epoch": 2.07, "grad_norm": 1.748942646475676, "learning_rate": 2.323504399958978e-06, "loss": 0.0541, "step": 7576 }, { "epoch": 2.07, "grad_norm": 1.3682559555317957, "learning_rate": 2.322259753252344e-06, "loss": 0.0444, "step": 7577 }, { "epoch": 2.07, "grad_norm": 1.571304413620297, "learning_rate": 2.3210153391593978e-06, "loss": 0.0428, "step": 7578 }, { "epoch": 2.07, "grad_norm": 1.4523939815908802, "learning_rate": 2.319771157788238e-06, "loss": 0.0552, "step": 7579 }, { "epoch": 2.07, "grad_norm": 1.943102107899454, "learning_rate": 2.3185272092469497e-06, "loss": 0.057, "step": 7580 }, { "epoch": 2.07, "grad_norm": 1.450829826275882, "learning_rate": 2.3172834936435913e-06, "loss": 0.0492, "step": 7581 }, { "epoch": 2.07, "grad_norm": 1.5851933402831677, "learning_rate": 2.316040011086204e-06, "loss": 0.0517, "step": 7582 }, { "epoch": 2.07, "grad_norm": 1.589527550328945, "learning_rate": 2.3147967616828067e-06, "loss": 0.045, "step": 7583 }, { "epoch": 2.07, "grad_norm": 1.2777718755646204, "learning_rate": 2.313553745541403e-06, "loss": 0.037, "step": 7584 }, { "epoch": 2.07, "grad_norm": 1.4591973466812223, "learning_rate": 2.3123109627699695e-06, "loss": 0.0506, "step": 7585 }, { "epoch": 2.07, "grad_norm": 1.502324905492929, "learning_rate": 2.31106841347647e-06, "loss": 0.0501, "step": 7586 }, { "epoch": 2.07, "grad_norm": 1.5512376524106535, "learning_rate": 2.3098260977688412e-06, "loss": 0.0511, "step": 7587 }, { "epoch": 2.07, "grad_norm": 1.5601547479680107, "learning_rate": 2.3085840157550036e-06, "loss": 0.0558, "step": 7588 }, { "epoch": 2.07, "grad_norm": 1.7306740814267172, "learning_rate": 2.307342167542854e-06, "loss": 0.0551, "step": 7589 }, { "epoch": 2.07, "grad_norm": 1.492895469026317, "learning_rate": 2.306100553240274e-06, "loss": 0.0408, "step": 7590 }, { "epoch": 2.07, "grad_norm": 1.6068578787089192, "learning_rate": 2.3048591729551184e-06, "loss": 0.0489, "step": 7591 }, { "epoch": 2.07, "grad_norm": 1.5158334598622607, "learning_rate": 2.303618026795229e-06, "loss": 0.0439, "step": 7592 }, { "epoch": 2.07, "grad_norm": 1.5983196453549129, "learning_rate": 2.302377114868422e-06, "loss": 0.04, "step": 7593 }, { "epoch": 2.07, "grad_norm": 1.7551625050848039, "learning_rate": 2.301136437282494e-06, "loss": 0.0456, "step": 7594 }, { "epoch": 2.07, "grad_norm": 1.6857404180284448, "learning_rate": 2.2998959941452203e-06, "loss": 0.0612, "step": 7595 }, { "epoch": 2.07, "grad_norm": 1.400021033123697, "learning_rate": 2.2986557855643617e-06, "loss": 0.0443, "step": 7596 }, { "epoch": 2.07, "grad_norm": 1.8589435909544976, "learning_rate": 2.297415811647649e-06, "loss": 0.064, "step": 7597 }, { "epoch": 2.07, "grad_norm": 1.4273731790347688, "learning_rate": 2.2961760725028036e-06, "loss": 0.0459, "step": 7598 }, { "epoch": 2.07, "grad_norm": 1.5788197167286022, "learning_rate": 2.2949365682375185e-06, "loss": 0.0499, "step": 7599 }, { "epoch": 2.07, "grad_norm": 1.472154848246478, "learning_rate": 2.2936972989594684e-06, "loss": 0.0549, "step": 7600 }, { "epoch": 2.08, "grad_norm": 1.3366290195493562, "learning_rate": 2.292458264776306e-06, "loss": 0.0383, "step": 7601 }, { "epoch": 2.08, "grad_norm": 1.675983968477484, "learning_rate": 2.291219465795669e-06, "loss": 0.0511, "step": 7602 }, { "epoch": 2.08, "grad_norm": 1.544732135812631, "learning_rate": 2.289980902125168e-06, "loss": 0.0524, "step": 7603 }, { "epoch": 2.08, "grad_norm": 1.376764389317635, "learning_rate": 2.2887425738723994e-06, "loss": 0.0487, "step": 7604 }, { "epoch": 2.08, "grad_norm": 1.7467665709834814, "learning_rate": 2.2875044811449347e-06, "loss": 0.0565, "step": 7605 }, { "epoch": 2.08, "grad_norm": 1.3727162744176389, "learning_rate": 2.286266624050326e-06, "loss": 0.0439, "step": 7606 }, { "epoch": 2.08, "grad_norm": 1.7540345273310114, "learning_rate": 2.2850290026961032e-06, "loss": 0.0544, "step": 7607 }, { "epoch": 2.08, "grad_norm": 1.4530033901467136, "learning_rate": 2.2837916171897816e-06, "loss": 0.0465, "step": 7608 }, { "epoch": 2.08, "grad_norm": 1.518431323614025, "learning_rate": 2.282554467638849e-06, "loss": 0.0463, "step": 7609 }, { "epoch": 2.08, "grad_norm": 1.4532104231630625, "learning_rate": 2.2813175541507782e-06, "loss": 0.0459, "step": 7610 }, { "epoch": 2.08, "grad_norm": 1.4851585783861172, "learning_rate": 2.2800808768330184e-06, "loss": 0.0438, "step": 7611 }, { "epoch": 2.08, "grad_norm": 1.8190002962686183, "learning_rate": 2.278844435792998e-06, "loss": 0.0524, "step": 7612 }, { "epoch": 2.08, "grad_norm": 1.42588456768309, "learning_rate": 2.277608231138126e-06, "loss": 0.046, "step": 7613 }, { "epoch": 2.08, "grad_norm": 1.2059015319087816, "learning_rate": 2.2763722629757924e-06, "loss": 0.0373, "step": 7614 }, { "epoch": 2.08, "grad_norm": 1.364858285271689, "learning_rate": 2.2751365314133623e-06, "loss": 0.0408, "step": 7615 }, { "epoch": 2.08, "grad_norm": 1.289317735072688, "learning_rate": 2.2739010365581866e-06, "loss": 0.0334, "step": 7616 }, { "epoch": 2.08, "grad_norm": 1.4027579949343196, "learning_rate": 2.2726657785175892e-06, "loss": 0.0493, "step": 7617 }, { "epoch": 2.08, "grad_norm": 1.3856619102350474, "learning_rate": 2.2714307573988776e-06, "loss": 0.0415, "step": 7618 }, { "epoch": 2.08, "grad_norm": 1.617443408700295, "learning_rate": 2.2701959733093347e-06, "loss": 0.0497, "step": 7619 }, { "epoch": 2.08, "grad_norm": 1.5006837865932632, "learning_rate": 2.2689614263562297e-06, "loss": 0.0435, "step": 7620 }, { "epoch": 2.08, "grad_norm": 1.460853769798232, "learning_rate": 2.2677271166468024e-06, "loss": 0.0432, "step": 7621 }, { "epoch": 2.08, "grad_norm": 1.7579801425646928, "learning_rate": 2.266493044288281e-06, "loss": 0.0617, "step": 7622 }, { "epoch": 2.08, "grad_norm": 1.599145519630225, "learning_rate": 2.265259209387867e-06, "loss": 0.0482, "step": 7623 }, { "epoch": 2.08, "grad_norm": 1.46630507774899, "learning_rate": 2.2640256120527413e-06, "loss": 0.0502, "step": 7624 }, { "epoch": 2.08, "grad_norm": 1.3967425420194994, "learning_rate": 2.262792252390066e-06, "loss": 0.0418, "step": 7625 }, { "epoch": 2.08, "grad_norm": 1.775530077724054, "learning_rate": 2.2615591305069846e-06, "loss": 0.0568, "step": 7626 }, { "epoch": 2.08, "grad_norm": 1.3429775183850952, "learning_rate": 2.2603262465106147e-06, "loss": 0.0361, "step": 7627 }, { "epoch": 2.08, "grad_norm": 1.592797036798222, "learning_rate": 2.2590936005080594e-06, "loss": 0.0573, "step": 7628 }, { "epoch": 2.08, "grad_norm": 1.4405335971063797, "learning_rate": 2.257861192606396e-06, "loss": 0.0481, "step": 7629 }, { "epoch": 2.08, "grad_norm": 1.3926611469973187, "learning_rate": 2.2566290229126837e-06, "loss": 0.0403, "step": 7630 }, { "epoch": 2.08, "grad_norm": 1.4336549257731726, "learning_rate": 2.255397091533958e-06, "loss": 0.0453, "step": 7631 }, { "epoch": 2.08, "grad_norm": 1.4193505176959316, "learning_rate": 2.2541653985772394e-06, "loss": 0.0486, "step": 7632 }, { "epoch": 2.08, "grad_norm": 1.4782932694553221, "learning_rate": 2.252933944149522e-06, "loss": 0.0379, "step": 7633 }, { "epoch": 2.08, "grad_norm": 1.343700709155917, "learning_rate": 2.251702728357783e-06, "loss": 0.0437, "step": 7634 }, { "epoch": 2.08, "grad_norm": 1.293332829306894, "learning_rate": 2.2504717513089773e-06, "loss": 0.0367, "step": 7635 }, { "epoch": 2.08, "grad_norm": 1.3029671763377195, "learning_rate": 2.249241013110039e-06, "loss": 0.0425, "step": 7636 }, { "epoch": 2.08, "grad_norm": 1.7098572515457942, "learning_rate": 2.248010513867879e-06, "loss": 0.0481, "step": 7637 }, { "epoch": 2.09, "grad_norm": 1.403924983273616, "learning_rate": 2.246780253689394e-06, "loss": 0.0471, "step": 7638 }, { "epoch": 2.09, "grad_norm": 1.781435672744397, "learning_rate": 2.245550232681453e-06, "loss": 0.0529, "step": 7639 }, { "epoch": 2.09, "grad_norm": 1.3709736956694176, "learning_rate": 2.2443204509509094e-06, "loss": 0.0411, "step": 7640 }, { "epoch": 2.09, "grad_norm": 1.6308684174630381, "learning_rate": 2.243090908604593e-06, "loss": 0.0447, "step": 7641 }, { "epoch": 2.09, "grad_norm": 1.3301624465146882, "learning_rate": 2.2418616057493125e-06, "loss": 0.0463, "step": 7642 }, { "epoch": 2.09, "grad_norm": 1.8268856860764175, "learning_rate": 2.2406325424918562e-06, "loss": 0.0662, "step": 7643 }, { "epoch": 2.09, "grad_norm": 1.3916864802324946, "learning_rate": 2.2394037189389943e-06, "loss": 0.0463, "step": 7644 }, { "epoch": 2.09, "grad_norm": 1.3999831368461508, "learning_rate": 2.238175135197471e-06, "loss": 0.0406, "step": 7645 }, { "epoch": 2.09, "grad_norm": 1.2398709726568975, "learning_rate": 2.236946791374016e-06, "loss": 0.0407, "step": 7646 }, { "epoch": 2.09, "grad_norm": 1.7159626967387376, "learning_rate": 2.2357186875753333e-06, "loss": 0.0534, "step": 7647 }, { "epoch": 2.09, "grad_norm": 1.4948736675191263, "learning_rate": 2.2344908239081076e-06, "loss": 0.0442, "step": 7648 }, { "epoch": 2.09, "grad_norm": 1.5138630148447723, "learning_rate": 2.2332632004790007e-06, "loss": 0.0416, "step": 7649 }, { "epoch": 2.09, "grad_norm": 1.6280263184553732, "learning_rate": 2.2320358173946587e-06, "loss": 0.0481, "step": 7650 }, { "epoch": 2.09, "grad_norm": 1.575910483837429, "learning_rate": 2.230808674761701e-06, "loss": 0.0559, "step": 7651 }, { "epoch": 2.09, "grad_norm": 3.4842978247482534, "learning_rate": 2.2295817726867313e-06, "loss": 0.0536, "step": 7652 }, { "epoch": 2.09, "grad_norm": 1.514600048320214, "learning_rate": 2.2283551112763284e-06, "loss": 0.0483, "step": 7653 }, { "epoch": 2.09, "grad_norm": 1.8199166667140514, "learning_rate": 2.2271286906370504e-06, "loss": 0.0489, "step": 7654 }, { "epoch": 2.09, "grad_norm": 1.4253198151145172, "learning_rate": 2.2259025108754388e-06, "loss": 0.0453, "step": 7655 }, { "epoch": 2.09, "grad_norm": 1.2171111099162017, "learning_rate": 2.2246765720980074e-06, "loss": 0.0419, "step": 7656 }, { "epoch": 2.09, "grad_norm": 1.3174041181723641, "learning_rate": 2.2234508744112564e-06, "loss": 0.0424, "step": 7657 }, { "epoch": 2.09, "grad_norm": 1.4188081701404573, "learning_rate": 2.2222254179216602e-06, "loss": 0.0487, "step": 7658 }, { "epoch": 2.09, "grad_norm": 1.3457789391986241, "learning_rate": 2.2210002027356723e-06, "loss": 0.0432, "step": 7659 }, { "epoch": 2.09, "grad_norm": 1.686292024139762, "learning_rate": 2.219775228959726e-06, "loss": 0.0507, "step": 7660 }, { "epoch": 2.09, "grad_norm": 1.6161021225646754, "learning_rate": 2.218550496700237e-06, "loss": 0.0554, "step": 7661 }, { "epoch": 2.09, "grad_norm": 1.3862607310245512, "learning_rate": 2.2173260060635927e-06, "loss": 0.0401, "step": 7662 }, { "epoch": 2.09, "grad_norm": 1.667351863916119, "learning_rate": 2.216101757156169e-06, "loss": 0.0568, "step": 7663 }, { "epoch": 2.09, "grad_norm": 1.58665205752601, "learning_rate": 2.2148777500843125e-06, "loss": 0.0472, "step": 7664 }, { "epoch": 2.09, "grad_norm": 1.5140741642804514, "learning_rate": 2.2136539849543525e-06, "loss": 0.0489, "step": 7665 }, { "epoch": 2.09, "grad_norm": 1.520828894147409, "learning_rate": 2.2124304618725956e-06, "loss": 0.049, "step": 7666 }, { "epoch": 2.09, "grad_norm": 1.6303484004422177, "learning_rate": 2.2112071809453306e-06, "loss": 0.0514, "step": 7667 }, { "epoch": 2.09, "grad_norm": 1.4937625396977685, "learning_rate": 2.209984142278821e-06, "loss": 0.0487, "step": 7668 }, { "epoch": 2.09, "grad_norm": 1.8526561129813044, "learning_rate": 2.2087613459793143e-06, "loss": 0.0536, "step": 7669 }, { "epoch": 2.09, "grad_norm": 1.4570860009307929, "learning_rate": 2.2075387921530327e-06, "loss": 0.0486, "step": 7670 }, { "epoch": 2.09, "grad_norm": 1.6369724323436896, "learning_rate": 2.2063164809061783e-06, "loss": 0.0545, "step": 7671 }, { "epoch": 2.09, "grad_norm": 1.490502897413803, "learning_rate": 2.205094412344931e-06, "loss": 0.0448, "step": 7672 }, { "epoch": 2.09, "grad_norm": 1.4971969392969942, "learning_rate": 2.2038725865754543e-06, "loss": 0.0468, "step": 7673 }, { "epoch": 2.1, "grad_norm": 1.5807379655128104, "learning_rate": 2.202651003703885e-06, "loss": 0.0585, "step": 7674 }, { "epoch": 2.1, "grad_norm": 1.6362436732041103, "learning_rate": 2.2014296638363437e-06, "loss": 0.0409, "step": 7675 }, { "epoch": 2.1, "grad_norm": 1.4379636652063073, "learning_rate": 2.2002085670789257e-06, "loss": 0.0431, "step": 7676 }, { "epoch": 2.1, "grad_norm": 1.6130344090275623, "learning_rate": 2.198987713537708e-06, "loss": 0.0488, "step": 7677 }, { "epoch": 2.1, "grad_norm": 1.4453948439224231, "learning_rate": 2.1977671033187425e-06, "loss": 0.0474, "step": 7678 }, { "epoch": 2.1, "grad_norm": 1.8024263482546783, "learning_rate": 2.196546736528067e-06, "loss": 0.0453, "step": 7679 }, { "epoch": 2.1, "grad_norm": 1.5864496000804666, "learning_rate": 2.1953266132716903e-06, "loss": 0.0532, "step": 7680 }, { "epoch": 2.1, "grad_norm": 1.3410641497346425, "learning_rate": 2.1941067336556082e-06, "loss": 0.0393, "step": 7681 }, { "epoch": 2.1, "grad_norm": 2.039115095323855, "learning_rate": 2.1928870977857873e-06, "loss": 0.0525, "step": 7682 }, { "epoch": 2.1, "grad_norm": 1.7243659912974534, "learning_rate": 2.1916677057681786e-06, "loss": 0.0491, "step": 7683 }, { "epoch": 2.1, "grad_norm": 1.659263106049305, "learning_rate": 2.1904485577087066e-06, "loss": 0.0584, "step": 7684 }, { "epoch": 2.1, "grad_norm": 1.6009967663664904, "learning_rate": 2.1892296537132822e-06, "loss": 0.0481, "step": 7685 }, { "epoch": 2.1, "grad_norm": 1.6696131274065544, "learning_rate": 2.188010993887787e-06, "loss": 0.0539, "step": 7686 }, { "epoch": 2.1, "grad_norm": 1.444899124482223, "learning_rate": 2.1867925783380893e-06, "loss": 0.0453, "step": 7687 }, { "epoch": 2.1, "grad_norm": 1.8349081832420642, "learning_rate": 2.1855744071700303e-06, "loss": 0.0587, "step": 7688 }, { "epoch": 2.1, "grad_norm": 1.480256626876591, "learning_rate": 2.1843564804894316e-06, "loss": 0.0462, "step": 7689 }, { "epoch": 2.1, "grad_norm": 1.5649200161013583, "learning_rate": 2.183138798402092e-06, "loss": 0.0487, "step": 7690 }, { "epoch": 2.1, "grad_norm": 1.5257571402479257, "learning_rate": 2.181921361013794e-06, "loss": 0.0508, "step": 7691 }, { "epoch": 2.1, "grad_norm": 1.205779753780435, "learning_rate": 2.1807041684302928e-06, "loss": 0.0407, "step": 7692 }, { "epoch": 2.1, "grad_norm": 1.7846379075064627, "learning_rate": 2.1794872207573286e-06, "loss": 0.0546, "step": 7693 }, { "epoch": 2.1, "grad_norm": 1.5997319416493758, "learning_rate": 2.1782705181006148e-06, "loss": 0.0554, "step": 7694 }, { "epoch": 2.1, "grad_norm": 1.2957778245233489, "learning_rate": 2.177054060565845e-06, "loss": 0.0376, "step": 7695 }, { "epoch": 2.1, "grad_norm": 1.4009549130388859, "learning_rate": 2.1758378482586924e-06, "loss": 0.0413, "step": 7696 }, { "epoch": 2.1, "grad_norm": 1.5925360488429634, "learning_rate": 2.1746218812848097e-06, "loss": 0.0519, "step": 7697 }, { "epoch": 2.1, "grad_norm": 1.3701076520046696, "learning_rate": 2.1734061597498256e-06, "loss": 0.0499, "step": 7698 }, { "epoch": 2.1, "grad_norm": 1.5069849078690918, "learning_rate": 2.1721906837593514e-06, "loss": 0.0444, "step": 7699 }, { "epoch": 2.1, "grad_norm": 1.456057254387466, "learning_rate": 2.170975453418974e-06, "loss": 0.0511, "step": 7700 }, { "epoch": 2.1, "grad_norm": 1.4658315923399063, "learning_rate": 2.1697604688342594e-06, "loss": 0.0452, "step": 7701 }, { "epoch": 2.1, "grad_norm": 1.6552396112015717, "learning_rate": 2.1685457301107506e-06, "loss": 0.0471, "step": 7702 }, { "epoch": 2.1, "grad_norm": 1.3671635513037255, "learning_rate": 2.167331237353974e-06, "loss": 0.0441, "step": 7703 }, { "epoch": 2.1, "grad_norm": 1.4439055345538996, "learning_rate": 2.16611699066943e-06, "loss": 0.0424, "step": 7704 }, { "epoch": 2.1, "grad_norm": 1.7250617868294074, "learning_rate": 2.164902990162602e-06, "loss": 0.0534, "step": 7705 }, { "epoch": 2.1, "grad_norm": 1.7564844538457465, "learning_rate": 2.1636892359389476e-06, "loss": 0.0545, "step": 7706 }, { "epoch": 2.1, "grad_norm": 2.1110422181675603, "learning_rate": 2.1624757281039056e-06, "loss": 0.0599, "step": 7707 }, { "epoch": 2.1, "grad_norm": 1.7453026628414732, "learning_rate": 2.16126246676289e-06, "loss": 0.0547, "step": 7708 }, { "epoch": 2.1, "grad_norm": 1.515852528891318, "learning_rate": 2.1600494520213006e-06, "loss": 0.0482, "step": 7709 }, { "epoch": 2.1, "grad_norm": 1.624721070154593, "learning_rate": 2.158836683984507e-06, "loss": 0.0542, "step": 7710 }, { "epoch": 2.11, "grad_norm": 1.4601312872432688, "learning_rate": 2.1576241627578654e-06, "loss": 0.0398, "step": 7711 }, { "epoch": 2.11, "grad_norm": 1.5589654476108326, "learning_rate": 2.156411888446705e-06, "loss": 0.0466, "step": 7712 }, { "epoch": 2.11, "grad_norm": 1.5921808041888277, "learning_rate": 2.1551998611563355e-06, "loss": 0.0663, "step": 7713 }, { "epoch": 2.11, "grad_norm": 1.5978527651273398, "learning_rate": 2.1539880809920433e-06, "loss": 0.0483, "step": 7714 }, { "epoch": 2.11, "grad_norm": 1.4437560235513083, "learning_rate": 2.152776548059098e-06, "loss": 0.0471, "step": 7715 }, { "epoch": 2.11, "grad_norm": 1.70637716803343, "learning_rate": 2.151565262462742e-06, "loss": 0.0627, "step": 7716 }, { "epoch": 2.11, "grad_norm": 1.2347661224502224, "learning_rate": 2.1503542243082016e-06, "loss": 0.0391, "step": 7717 }, { "epoch": 2.11, "grad_norm": 1.5886820747654424, "learning_rate": 2.1491434337006777e-06, "loss": 0.0464, "step": 7718 }, { "epoch": 2.11, "grad_norm": 1.515478311199961, "learning_rate": 2.147932890745351e-06, "loss": 0.0574, "step": 7719 }, { "epoch": 2.11, "grad_norm": 1.3630286522709556, "learning_rate": 2.1467225955473786e-06, "loss": 0.0424, "step": 7720 }, { "epoch": 2.11, "grad_norm": 1.729905560066306, "learning_rate": 2.145512548211902e-06, "loss": 0.0517, "step": 7721 }, { "epoch": 2.11, "grad_norm": 1.671694412506426, "learning_rate": 2.1443027488440338e-06, "loss": 0.0478, "step": 7722 }, { "epoch": 2.11, "grad_norm": 1.4671785264596322, "learning_rate": 2.1430931975488715e-06, "loss": 0.0528, "step": 7723 }, { "epoch": 2.11, "grad_norm": 1.3996389864197183, "learning_rate": 2.1418838944314866e-06, "loss": 0.046, "step": 7724 }, { "epoch": 2.11, "grad_norm": 1.3042824222804872, "learning_rate": 2.140674839596931e-06, "loss": 0.0403, "step": 7725 }, { "epoch": 2.11, "grad_norm": 1.9163178994240455, "learning_rate": 2.1394660331502322e-06, "loss": 0.0585, "step": 7726 }, { "epoch": 2.11, "grad_norm": 1.5980340307750944, "learning_rate": 2.138257475196402e-06, "loss": 0.0496, "step": 7727 }, { "epoch": 2.11, "grad_norm": 1.3834564319937144, "learning_rate": 2.1370491658404235e-06, "loss": 0.0441, "step": 7728 }, { "epoch": 2.11, "grad_norm": 1.311981403660786, "learning_rate": 2.135841105187266e-06, "loss": 0.0476, "step": 7729 }, { "epoch": 2.11, "grad_norm": 1.4578929951399202, "learning_rate": 2.134633293341871e-06, "loss": 0.0453, "step": 7730 }, { "epoch": 2.11, "grad_norm": 1.7951757284219738, "learning_rate": 2.1334257304091603e-06, "loss": 0.0566, "step": 7731 }, { "epoch": 2.11, "grad_norm": 1.4829455331152133, "learning_rate": 2.1322184164940324e-06, "loss": 0.0489, "step": 7732 }, { "epoch": 2.11, "grad_norm": 1.5165166913305823, "learning_rate": 2.1310113517013693e-06, "loss": 0.0489, "step": 7733 }, { "epoch": 2.11, "grad_norm": 1.7203524176482812, "learning_rate": 2.129804536136025e-06, "loss": 0.054, "step": 7734 }, { "epoch": 2.11, "grad_norm": 1.5739682140469393, "learning_rate": 2.1285979699028376e-06, "loss": 0.05, "step": 7735 }, { "epoch": 2.11, "grad_norm": 1.5223370492182904, "learning_rate": 2.1273916531066193e-06, "loss": 0.0528, "step": 7736 }, { "epoch": 2.11, "grad_norm": 1.3697488144628571, "learning_rate": 2.126185585852162e-06, "loss": 0.0454, "step": 7737 }, { "epoch": 2.11, "grad_norm": 1.4414655254685338, "learning_rate": 2.1249797682442346e-06, "loss": 0.0409, "step": 7738 }, { "epoch": 2.11, "grad_norm": 1.4609203083422198, "learning_rate": 2.1237742003875895e-06, "loss": 0.0462, "step": 7739 }, { "epoch": 2.11, "grad_norm": 1.5035723275710244, "learning_rate": 2.1225688823869494e-06, "loss": 0.0499, "step": 7740 }, { "epoch": 2.11, "grad_norm": 1.3718125797347978, "learning_rate": 2.1213638143470234e-06, "loss": 0.0455, "step": 7741 }, { "epoch": 2.11, "grad_norm": 1.616421031835314, "learning_rate": 2.1201589963724933e-06, "loss": 0.0585, "step": 7742 }, { "epoch": 2.11, "grad_norm": 1.4159906043808597, "learning_rate": 2.1189544285680214e-06, "loss": 0.0477, "step": 7743 }, { "epoch": 2.11, "grad_norm": 1.6069130153010018, "learning_rate": 2.1177501110382455e-06, "loss": 0.0506, "step": 7744 }, { "epoch": 2.11, "grad_norm": 1.766691361378118, "learning_rate": 2.1165460438877856e-06, "loss": 0.0549, "step": 7745 }, { "epoch": 2.11, "grad_norm": 1.5530794788415467, "learning_rate": 2.1153422272212398e-06, "loss": 0.0506, "step": 7746 }, { "epoch": 2.11, "grad_norm": 1.6474714942926358, "learning_rate": 2.1141386611431818e-06, "loss": 0.0469, "step": 7747 }, { "epoch": 2.12, "grad_norm": 1.5125926852891596, "learning_rate": 2.1129353457581647e-06, "loss": 0.0472, "step": 7748 }, { "epoch": 2.12, "grad_norm": 1.4729865465977838, "learning_rate": 2.111732281170718e-06, "loss": 0.0512, "step": 7749 }, { "epoch": 2.12, "grad_norm": 1.5229022782093555, "learning_rate": 2.1105294674853543e-06, "loss": 0.0504, "step": 7750 }, { "epoch": 2.12, "grad_norm": 1.8257730028337207, "learning_rate": 2.109326904806558e-06, "loss": 0.0659, "step": 7751 }, { "epoch": 2.12, "grad_norm": 1.5871578368332793, "learning_rate": 2.108124593238798e-06, "loss": 0.0551, "step": 7752 }, { "epoch": 2.12, "grad_norm": 1.4547307346475309, "learning_rate": 2.106922532886517e-06, "loss": 0.0498, "step": 7753 }, { "epoch": 2.12, "grad_norm": 1.6077294998445286, "learning_rate": 2.105720723854138e-06, "loss": 0.0493, "step": 7754 }, { "epoch": 2.12, "grad_norm": 1.3829068844835544, "learning_rate": 2.104519166246059e-06, "loss": 0.0435, "step": 7755 }, { "epoch": 2.12, "grad_norm": 1.5420311823340107, "learning_rate": 2.103317860166662e-06, "loss": 0.045, "step": 7756 }, { "epoch": 2.12, "grad_norm": 1.5249571409645588, "learning_rate": 2.1021168057203008e-06, "loss": 0.0517, "step": 7757 }, { "epoch": 2.12, "grad_norm": 1.9595829931747413, "learning_rate": 2.1009160030113128e-06, "loss": 0.0501, "step": 7758 }, { "epoch": 2.12, "grad_norm": 1.7430004946930417, "learning_rate": 2.09971545214401e-06, "loss": 0.0608, "step": 7759 }, { "epoch": 2.12, "grad_norm": 1.515632271760031, "learning_rate": 2.0985151532226834e-06, "loss": 0.0483, "step": 7760 }, { "epoch": 2.12, "grad_norm": 1.4754426486601246, "learning_rate": 2.0973151063516e-06, "loss": 0.0557, "step": 7761 }, { "epoch": 2.12, "grad_norm": 1.264308021842779, "learning_rate": 2.096115311635011e-06, "loss": 0.0413, "step": 7762 }, { "epoch": 2.12, "grad_norm": 1.3739104782719076, "learning_rate": 2.0949157691771395e-06, "loss": 0.0422, "step": 7763 }, { "epoch": 2.12, "grad_norm": 1.9306220316413207, "learning_rate": 2.0937164790821907e-06, "loss": 0.0548, "step": 7764 }, { "epoch": 2.12, "grad_norm": 1.354528985374528, "learning_rate": 2.0925174414543454e-06, "loss": 0.0413, "step": 7765 }, { "epoch": 2.12, "grad_norm": 1.4123816436259278, "learning_rate": 2.0913186563977634e-06, "loss": 0.0436, "step": 7766 }, { "epoch": 2.12, "grad_norm": 1.537961081431161, "learning_rate": 2.0901201240165797e-06, "loss": 0.0511, "step": 7767 }, { "epoch": 2.12, "grad_norm": 1.4646898787400453, "learning_rate": 2.0889218444149145e-06, "loss": 0.0454, "step": 7768 }, { "epoch": 2.12, "grad_norm": 1.6327447718797505, "learning_rate": 2.0877238176968585e-06, "loss": 0.0502, "step": 7769 }, { "epoch": 2.12, "grad_norm": 1.356354250775867, "learning_rate": 2.0865260439664857e-06, "loss": 0.0448, "step": 7770 }, { "epoch": 2.12, "grad_norm": 1.5118410978571668, "learning_rate": 2.0853285233278454e-06, "loss": 0.0548, "step": 7771 }, { "epoch": 2.12, "grad_norm": 1.5133616736592606, "learning_rate": 2.0841312558849653e-06, "loss": 0.0476, "step": 7772 }, { "epoch": 2.12, "grad_norm": 1.389973799756541, "learning_rate": 2.0829342417418493e-06, "loss": 0.0506, "step": 7773 }, { "epoch": 2.12, "grad_norm": 1.3037531408099183, "learning_rate": 2.081737481002484e-06, "loss": 0.0408, "step": 7774 }, { "epoch": 2.12, "grad_norm": 1.6352657510066801, "learning_rate": 2.0805409737708297e-06, "loss": 0.0543, "step": 7775 }, { "epoch": 2.12, "grad_norm": 1.4295526286713385, "learning_rate": 2.0793447201508288e-06, "loss": 0.0486, "step": 7776 }, { "epoch": 2.12, "grad_norm": 1.5172547434495156, "learning_rate": 2.078148720246397e-06, "loss": 0.0445, "step": 7777 }, { "epoch": 2.12, "grad_norm": 1.4740608359245537, "learning_rate": 2.0769529741614297e-06, "loss": 0.05, "step": 7778 }, { "epoch": 2.12, "grad_norm": 1.6067616543549852, "learning_rate": 2.0757574819998e-06, "loss": 0.0527, "step": 7779 }, { "epoch": 2.12, "grad_norm": 1.5781559215022924, "learning_rate": 2.0745622438653627e-06, "loss": 0.052, "step": 7780 }, { "epoch": 2.12, "grad_norm": 1.559826462112527, "learning_rate": 2.0733672598619444e-06, "loss": 0.0517, "step": 7781 }, { "epoch": 2.12, "grad_norm": 1.3888181006178777, "learning_rate": 2.0721725300933552e-06, "loss": 0.0468, "step": 7782 }, { "epoch": 2.12, "grad_norm": 1.5638577420089863, "learning_rate": 2.070978054663379e-06, "loss": 0.0476, "step": 7783 }, { "epoch": 2.13, "grad_norm": 1.6050613637259945, "learning_rate": 2.0697838336757796e-06, "loss": 0.0576, "step": 7784 }, { "epoch": 2.13, "grad_norm": 1.4089142071152132, "learning_rate": 2.0685898672342967e-06, "loss": 0.0456, "step": 7785 }, { "epoch": 2.13, "grad_norm": 1.5300530579668767, "learning_rate": 2.067396155442652e-06, "loss": 0.0612, "step": 7786 }, { "epoch": 2.13, "grad_norm": 1.6484293749905639, "learning_rate": 2.0662026984045396e-06, "loss": 0.0563, "step": 7787 }, { "epoch": 2.13, "grad_norm": 1.4691297601126798, "learning_rate": 2.065009496223638e-06, "loss": 0.0491, "step": 7788 }, { "epoch": 2.13, "grad_norm": 1.6824805799177214, "learning_rate": 2.063816549003599e-06, "loss": 0.0519, "step": 7789 }, { "epoch": 2.13, "grad_norm": 1.5403406514618005, "learning_rate": 2.062623856848051e-06, "loss": 0.0449, "step": 7790 }, { "epoch": 2.13, "grad_norm": 1.4618512018526246, "learning_rate": 2.061431419860603e-06, "loss": 0.0508, "step": 7791 }, { "epoch": 2.13, "grad_norm": 1.5768809500425693, "learning_rate": 2.0602392381448427e-06, "loss": 0.053, "step": 7792 }, { "epoch": 2.13, "grad_norm": 1.5707215816601925, "learning_rate": 2.0590473118043326e-06, "loss": 0.0544, "step": 7793 }, { "epoch": 2.13, "grad_norm": 1.626813443045718, "learning_rate": 2.057855640942617e-06, "loss": 0.0482, "step": 7794 }, { "epoch": 2.13, "grad_norm": 1.4206355601913683, "learning_rate": 2.056664225663214e-06, "loss": 0.0477, "step": 7795 }, { "epoch": 2.13, "grad_norm": 1.5760286190845691, "learning_rate": 2.0554730660696214e-06, "loss": 0.0498, "step": 7796 }, { "epoch": 2.13, "grad_norm": 1.3561025048514879, "learning_rate": 2.054282162265313e-06, "loss": 0.0434, "step": 7797 }, { "epoch": 2.13, "grad_norm": 1.3167705952037672, "learning_rate": 2.053091514353745e-06, "loss": 0.0443, "step": 7798 }, { "epoch": 2.13, "grad_norm": 1.4423705057792942, "learning_rate": 2.051901122438345e-06, "loss": 0.0478, "step": 7799 }, { "epoch": 2.13, "grad_norm": 1.8799137959968626, "learning_rate": 2.0507109866225243e-06, "loss": 0.0497, "step": 7800 }, { "epoch": 2.13, "grad_norm": 1.750496124531781, "learning_rate": 2.049521107009669e-06, "loss": 0.0491, "step": 7801 }, { "epoch": 2.13, "grad_norm": 1.4535862961989705, "learning_rate": 2.048331483703142e-06, "loss": 0.0478, "step": 7802 }, { "epoch": 2.13, "grad_norm": 1.7139688425136053, "learning_rate": 2.0471421168062845e-06, "loss": 0.0399, "step": 7803 }, { "epoch": 2.13, "grad_norm": 1.6477892250932662, "learning_rate": 2.0459530064224183e-06, "loss": 0.0505, "step": 7804 }, { "epoch": 2.13, "grad_norm": 1.7039773118983894, "learning_rate": 2.0447641526548377e-06, "loss": 0.0531, "step": 7805 }, { "epoch": 2.13, "grad_norm": 1.449287930773047, "learning_rate": 2.043575555606822e-06, "loss": 0.0423, "step": 7806 }, { "epoch": 2.13, "grad_norm": 1.494215798853032, "learning_rate": 2.042387215381621e-06, "loss": 0.0455, "step": 7807 }, { "epoch": 2.13, "grad_norm": 1.5124702792621303, "learning_rate": 2.0411991320824657e-06, "loss": 0.0559, "step": 7808 }, { "epoch": 2.13, "grad_norm": 1.4620068170302547, "learning_rate": 2.040011305812563e-06, "loss": 0.0425, "step": 7809 }, { "epoch": 2.13, "grad_norm": 1.6498792186300844, "learning_rate": 2.0388237366751005e-06, "loss": 0.0449, "step": 7810 }, { "epoch": 2.13, "grad_norm": 1.9403859103290324, "learning_rate": 2.03763642477324e-06, "loss": 0.0573, "step": 7811 }, { "epoch": 2.13, "grad_norm": 1.5908742165498022, "learning_rate": 2.036449370210125e-06, "loss": 0.046, "step": 7812 }, { "epoch": 2.13, "grad_norm": 1.7260610038266784, "learning_rate": 2.0352625730888727e-06, "loss": 0.0478, "step": 7813 }, { "epoch": 2.13, "grad_norm": 1.5962290717880836, "learning_rate": 2.0340760335125794e-06, "loss": 0.0551, "step": 7814 }, { "epoch": 2.13, "grad_norm": 1.4312057243207117, "learning_rate": 2.032889751584317e-06, "loss": 0.0496, "step": 7815 }, { "epoch": 2.13, "grad_norm": 1.4944692181821482, "learning_rate": 2.0317037274071412e-06, "loss": 0.0429, "step": 7816 }, { "epoch": 2.13, "grad_norm": 1.5121052407610622, "learning_rate": 2.0305179610840775e-06, "loss": 0.0514, "step": 7817 }, { "epoch": 2.13, "grad_norm": 1.7338597665570605, "learning_rate": 2.0293324527181363e-06, "loss": 0.0538, "step": 7818 }, { "epoch": 2.13, "grad_norm": 1.7575409884749362, "learning_rate": 2.0281472024122992e-06, "loss": 0.058, "step": 7819 }, { "epoch": 2.13, "grad_norm": 1.2665488462710315, "learning_rate": 2.0269622102695303e-06, "loss": 0.043, "step": 7820 }, { "epoch": 2.14, "grad_norm": 1.5591025757795025, "learning_rate": 2.0257774763927656e-06, "loss": 0.0468, "step": 7821 }, { "epoch": 2.14, "grad_norm": 1.1625389945961262, "learning_rate": 2.0245930008849267e-06, "loss": 0.0368, "step": 7822 }, { "epoch": 2.14, "grad_norm": 1.5968746556318127, "learning_rate": 2.0234087838489042e-06, "loss": 0.0502, "step": 7823 }, { "epoch": 2.14, "grad_norm": 1.5542149784664891, "learning_rate": 2.0222248253875735e-06, "loss": 0.0567, "step": 7824 }, { "epoch": 2.14, "grad_norm": 1.3231833573843619, "learning_rate": 2.0210411256037844e-06, "loss": 0.0402, "step": 7825 }, { "epoch": 2.14, "grad_norm": 1.3403769738104137, "learning_rate": 2.019857684600362e-06, "loss": 0.0442, "step": 7826 }, { "epoch": 2.14, "grad_norm": 1.4480927657289338, "learning_rate": 2.01867450248011e-06, "loss": 0.0516, "step": 7827 }, { "epoch": 2.14, "grad_norm": 1.563754124969961, "learning_rate": 2.0174915793458154e-06, "loss": 0.0564, "step": 7828 }, { "epoch": 2.14, "grad_norm": 1.355365020456432, "learning_rate": 2.016308915300233e-06, "loss": 0.0391, "step": 7829 }, { "epoch": 2.14, "grad_norm": 1.8248091909783004, "learning_rate": 2.015126510446104e-06, "loss": 0.0585, "step": 7830 }, { "epoch": 2.14, "grad_norm": 1.7451387463920665, "learning_rate": 2.013944364886143e-06, "loss": 0.0512, "step": 7831 }, { "epoch": 2.14, "grad_norm": 1.3700698483102016, "learning_rate": 2.0127624787230397e-06, "loss": 0.046, "step": 7832 }, { "epoch": 2.14, "grad_norm": 1.4535732672335389, "learning_rate": 2.0115808520594638e-06, "loss": 0.0434, "step": 7833 }, { "epoch": 2.14, "grad_norm": 1.4497472982744464, "learning_rate": 2.010399484998065e-06, "loss": 0.0564, "step": 7834 }, { "epoch": 2.14, "grad_norm": 1.7040216777047015, "learning_rate": 2.009218377641466e-06, "loss": 0.0587, "step": 7835 }, { "epoch": 2.14, "grad_norm": 1.5892149292824322, "learning_rate": 2.0080375300922703e-06, "loss": 0.0493, "step": 7836 }, { "epoch": 2.14, "grad_norm": 1.421009217921668, "learning_rate": 2.0068569424530577e-06, "loss": 0.0442, "step": 7837 }, { "epoch": 2.14, "grad_norm": 1.694277776676992, "learning_rate": 2.0056766148263825e-06, "loss": 0.0524, "step": 7838 }, { "epoch": 2.14, "grad_norm": 1.6856894760815495, "learning_rate": 2.0044965473147815e-06, "loss": 0.0577, "step": 7839 }, { "epoch": 2.14, "grad_norm": 1.3632547154858765, "learning_rate": 2.0033167400207647e-06, "loss": 0.0422, "step": 7840 }, { "epoch": 2.14, "grad_norm": 1.3626975326963566, "learning_rate": 2.0021371930468235e-06, "loss": 0.0404, "step": 7841 }, { "epoch": 2.14, "grad_norm": 1.5505909845709056, "learning_rate": 2.0009579064954236e-06, "loss": 0.0457, "step": 7842 }, { "epoch": 2.14, "grad_norm": 1.3973529997189955, "learning_rate": 1.999778880469009e-06, "loss": 0.0531, "step": 7843 }, { "epoch": 2.14, "grad_norm": 1.5258542477377819, "learning_rate": 1.998600115069998e-06, "loss": 0.0412, "step": 7844 }, { "epoch": 2.14, "grad_norm": 1.4302891056326903, "learning_rate": 1.997421610400793e-06, "loss": 0.045, "step": 7845 }, { "epoch": 2.14, "grad_norm": 1.5669878738284893, "learning_rate": 1.996243366563768e-06, "loss": 0.0493, "step": 7846 }, { "epoch": 2.14, "grad_norm": 1.3760944038427028, "learning_rate": 1.9950653836612783e-06, "loss": 0.046, "step": 7847 }, { "epoch": 2.14, "grad_norm": 1.6146863147395114, "learning_rate": 1.9938876617956533e-06, "loss": 0.0548, "step": 7848 }, { "epoch": 2.14, "grad_norm": 1.8327212882209223, "learning_rate": 1.9927102010692014e-06, "loss": 0.0511, "step": 7849 }, { "epoch": 2.14, "grad_norm": 1.651219090180509, "learning_rate": 1.9915330015842055e-06, "loss": 0.0464, "step": 7850 }, { "epoch": 2.14, "grad_norm": 1.6362518885381698, "learning_rate": 1.990356063442932e-06, "loss": 0.0516, "step": 7851 }, { "epoch": 2.14, "grad_norm": 1.454400970406789, "learning_rate": 1.989179386747617e-06, "loss": 0.0412, "step": 7852 }, { "epoch": 2.14, "grad_norm": 1.3803465715619918, "learning_rate": 1.9880029716004817e-06, "loss": 0.0452, "step": 7853 }, { "epoch": 2.14, "grad_norm": 1.442993687750572, "learning_rate": 1.9868268181037186e-06, "loss": 0.0508, "step": 7854 }, { "epoch": 2.14, "grad_norm": 1.3319003427694345, "learning_rate": 1.9856509263595e-06, "loss": 0.0396, "step": 7855 }, { "epoch": 2.14, "grad_norm": 1.5554488904825834, "learning_rate": 1.984475296469972e-06, "loss": 0.0465, "step": 7856 }, { "epoch": 2.14, "grad_norm": 1.4927437563339783, "learning_rate": 1.9832999285372653e-06, "loss": 0.0488, "step": 7857 }, { "epoch": 2.15, "grad_norm": 1.265028351196914, "learning_rate": 1.9821248226634793e-06, "loss": 0.036, "step": 7858 }, { "epoch": 2.15, "grad_norm": 1.4579311169951723, "learning_rate": 1.9809499789506985e-06, "loss": 0.0538, "step": 7859 }, { "epoch": 2.15, "grad_norm": 1.4340642524253164, "learning_rate": 1.9797753975009794e-06, "loss": 0.0401, "step": 7860 }, { "epoch": 2.15, "grad_norm": 1.6209045204191102, "learning_rate": 1.978601078416357e-06, "loss": 0.044, "step": 7861 }, { "epoch": 2.15, "grad_norm": 1.6675981489799814, "learning_rate": 1.977427021798841e-06, "loss": 0.0567, "step": 7862 }, { "epoch": 2.15, "grad_norm": 1.5695406567711878, "learning_rate": 1.9762532277504266e-06, "loss": 0.041, "step": 7863 }, { "epoch": 2.15, "grad_norm": 1.4371224960782363, "learning_rate": 1.9750796963730752e-06, "loss": 0.0429, "step": 7864 }, { "epoch": 2.15, "grad_norm": 1.4728631341312466, "learning_rate": 1.973906427768735e-06, "loss": 0.041, "step": 7865 }, { "epoch": 2.15, "grad_norm": 1.5418999644244622, "learning_rate": 1.9727334220393253e-06, "loss": 0.0498, "step": 7866 }, { "epoch": 2.15, "grad_norm": 1.5038065838690255, "learning_rate": 1.971560679286744e-06, "loss": 0.0522, "step": 7867 }, { "epoch": 2.15, "grad_norm": 1.8844241518084501, "learning_rate": 1.970388199612866e-06, "loss": 0.0486, "step": 7868 }, { "epoch": 2.15, "grad_norm": 1.5750638815612796, "learning_rate": 1.969215983119546e-06, "loss": 0.0472, "step": 7869 }, { "epoch": 2.15, "grad_norm": 1.6603770252210994, "learning_rate": 1.9680440299086114e-06, "loss": 0.0495, "step": 7870 }, { "epoch": 2.15, "grad_norm": 1.5777041981175985, "learning_rate": 1.966872340081872e-06, "loss": 0.0526, "step": 7871 }, { "epoch": 2.15, "grad_norm": 1.4258247151912036, "learning_rate": 1.9657009137411097e-06, "loss": 0.051, "step": 7872 }, { "epoch": 2.15, "grad_norm": 1.8414140369359633, "learning_rate": 1.964529750988086e-06, "loss": 0.0546, "step": 7873 }, { "epoch": 2.15, "grad_norm": 1.7254335481130056, "learning_rate": 1.9633588519245378e-06, "loss": 0.0588, "step": 7874 }, { "epoch": 2.15, "grad_norm": 1.735786107561333, "learning_rate": 1.962188216652183e-06, "loss": 0.0534, "step": 7875 }, { "epoch": 2.15, "grad_norm": 1.6700067904609694, "learning_rate": 1.961017845272711e-06, "loss": 0.0484, "step": 7876 }, { "epoch": 2.15, "grad_norm": 1.5637940603191256, "learning_rate": 1.9598477378877944e-06, "loss": 0.05, "step": 7877 }, { "epoch": 2.15, "grad_norm": 1.6698178826815329, "learning_rate": 1.9586778945990785e-06, "loss": 0.0495, "step": 7878 }, { "epoch": 2.15, "grad_norm": 1.4925687346383951, "learning_rate": 1.957508315508187e-06, "loss": 0.0487, "step": 7879 }, { "epoch": 2.15, "grad_norm": 1.6600549662376056, "learning_rate": 1.956339000716718e-06, "loss": 0.0506, "step": 7880 }, { "epoch": 2.15, "grad_norm": 1.4897150270189374, "learning_rate": 1.9551699503262534e-06, "loss": 0.0538, "step": 7881 }, { "epoch": 2.15, "grad_norm": 1.5569128098914728, "learning_rate": 1.954001164438344e-06, "loss": 0.0438, "step": 7882 }, { "epoch": 2.15, "grad_norm": 1.5739700993582202, "learning_rate": 1.9528326431545248e-06, "loss": 0.0531, "step": 7883 }, { "epoch": 2.15, "grad_norm": 1.5970191869259367, "learning_rate": 1.951664386576303e-06, "loss": 0.0464, "step": 7884 }, { "epoch": 2.15, "grad_norm": 1.604324173765507, "learning_rate": 1.9504963948051646e-06, "loss": 0.0565, "step": 7885 }, { "epoch": 2.15, "grad_norm": 1.663927712548092, "learning_rate": 1.949328667942571e-06, "loss": 0.0464, "step": 7886 }, { "epoch": 2.15, "grad_norm": 1.4923735181733429, "learning_rate": 1.9481612060899646e-06, "loss": 0.0425, "step": 7887 }, { "epoch": 2.15, "grad_norm": 1.5539395918729548, "learning_rate": 1.946994009348759e-06, "loss": 0.0461, "step": 7888 }, { "epoch": 2.15, "grad_norm": 1.3165604951095617, "learning_rate": 1.945827077820351e-06, "loss": 0.0441, "step": 7889 }, { "epoch": 2.15, "grad_norm": 1.628446485530381, "learning_rate": 1.9446604116061095e-06, "loss": 0.045, "step": 7890 }, { "epoch": 2.15, "grad_norm": 1.5397673096694424, "learning_rate": 1.943494010807383e-06, "loss": 0.0583, "step": 7891 }, { "epoch": 2.15, "grad_norm": 1.5181087651565, "learning_rate": 1.9423278755254933e-06, "loss": 0.0466, "step": 7892 }, { "epoch": 2.15, "grad_norm": 1.5163892077282617, "learning_rate": 1.9411620058617458e-06, "loss": 0.0508, "step": 7893 }, { "epoch": 2.16, "grad_norm": 1.4829450339473196, "learning_rate": 1.939996401917415e-06, "loss": 0.048, "step": 7894 }, { "epoch": 2.16, "grad_norm": 1.5366090046248948, "learning_rate": 1.9388310637937606e-06, "loss": 0.0557, "step": 7895 }, { "epoch": 2.16, "grad_norm": 1.513697605954505, "learning_rate": 1.937665991592012e-06, "loss": 0.0558, "step": 7896 }, { "epoch": 2.16, "grad_norm": 1.396043539619283, "learning_rate": 1.936501185413379e-06, "loss": 0.0453, "step": 7897 }, { "epoch": 2.16, "grad_norm": 1.2863188736265565, "learning_rate": 1.935336645359046e-06, "loss": 0.0448, "step": 7898 }, { "epoch": 2.16, "grad_norm": 1.5870620095832648, "learning_rate": 1.9341723715301786e-06, "loss": 0.0498, "step": 7899 }, { "epoch": 2.16, "grad_norm": 1.399188406110023, "learning_rate": 1.933008364027914e-06, "loss": 0.0456, "step": 7900 }, { "epoch": 2.16, "grad_norm": 1.5052417179934954, "learning_rate": 1.9318446229533717e-06, "loss": 0.0476, "step": 7901 }, { "epoch": 2.16, "grad_norm": 1.3205901716175221, "learning_rate": 1.9306811484076433e-06, "loss": 0.04, "step": 7902 }, { "epoch": 2.16, "grad_norm": 1.509832493319649, "learning_rate": 1.9295179404918e-06, "loss": 0.0501, "step": 7903 }, { "epoch": 2.16, "grad_norm": 1.4907555968252728, "learning_rate": 1.9283549993068863e-06, "loss": 0.0449, "step": 7904 }, { "epoch": 2.16, "grad_norm": 1.3975844515662, "learning_rate": 1.92719232495393e-06, "loss": 0.0405, "step": 7905 }, { "epoch": 2.16, "grad_norm": 1.5680883691912584, "learning_rate": 1.9260299175339288e-06, "loss": 0.0536, "step": 7906 }, { "epoch": 2.16, "grad_norm": 1.4729180938036717, "learning_rate": 1.924867777147863e-06, "loss": 0.0412, "step": 7907 }, { "epoch": 2.16, "grad_norm": 1.430743135315728, "learning_rate": 1.9237059038966867e-06, "loss": 0.0479, "step": 7908 }, { "epoch": 2.16, "grad_norm": 1.3856068120074836, "learning_rate": 1.9225442978813296e-06, "loss": 0.046, "step": 7909 }, { "epoch": 2.16, "grad_norm": 1.2903083566513454, "learning_rate": 1.921382959202699e-06, "loss": 0.0405, "step": 7910 }, { "epoch": 2.16, "grad_norm": 1.559217148370068, "learning_rate": 1.9202218879616824e-06, "loss": 0.0532, "step": 7911 }, { "epoch": 2.16, "grad_norm": 1.7755910772841519, "learning_rate": 1.9190610842591386e-06, "loss": 0.0508, "step": 7912 }, { "epoch": 2.16, "grad_norm": 1.8211453612759008, "learning_rate": 1.917900548195909e-06, "loss": 0.0543, "step": 7913 }, { "epoch": 2.16, "grad_norm": 1.5043877453022898, "learning_rate": 1.9167402798728068e-06, "loss": 0.0496, "step": 7914 }, { "epoch": 2.16, "grad_norm": 1.762457313373451, "learning_rate": 1.915580279390624e-06, "loss": 0.0465, "step": 7915 }, { "epoch": 2.16, "grad_norm": 1.4527783345581387, "learning_rate": 1.914420546850128e-06, "loss": 0.0433, "step": 7916 }, { "epoch": 2.16, "grad_norm": 1.36241044729645, "learning_rate": 1.9132610823520663e-06, "loss": 0.0464, "step": 7917 }, { "epoch": 2.16, "grad_norm": 1.4772735313052772, "learning_rate": 1.9121018859971584e-06, "loss": 0.0498, "step": 7918 }, { "epoch": 2.16, "grad_norm": 1.616506194829487, "learning_rate": 1.9109429578861066e-06, "loss": 0.0553, "step": 7919 }, { "epoch": 2.16, "grad_norm": 1.6082722954537647, "learning_rate": 1.9097842981195836e-06, "loss": 0.0412, "step": 7920 }, { "epoch": 2.16, "grad_norm": 1.5191249937291298, "learning_rate": 1.908625906798242e-06, "loss": 0.0511, "step": 7921 }, { "epoch": 2.16, "grad_norm": 1.4618685177366575, "learning_rate": 1.907467784022709e-06, "loss": 0.0493, "step": 7922 }, { "epoch": 2.16, "grad_norm": 1.4637727809802143, "learning_rate": 1.9063099298935933e-06, "loss": 0.0496, "step": 7923 }, { "epoch": 2.16, "grad_norm": 1.479710842556938, "learning_rate": 1.9051523445114733e-06, "loss": 0.0481, "step": 7924 }, { "epoch": 2.16, "grad_norm": 1.4820086439070792, "learning_rate": 1.9039950279769114e-06, "loss": 0.0468, "step": 7925 }, { "epoch": 2.16, "grad_norm": 1.3682367134027762, "learning_rate": 1.9028379803904417e-06, "loss": 0.0422, "step": 7926 }, { "epoch": 2.16, "grad_norm": 1.4440082212492422, "learning_rate": 1.9016812018525753e-06, "loss": 0.0556, "step": 7927 }, { "epoch": 2.16, "grad_norm": 1.2781817672722595, "learning_rate": 1.9005246924638e-06, "loss": 0.0371, "step": 7928 }, { "epoch": 2.16, "grad_norm": 1.7713988279791533, "learning_rate": 1.8993684523245842e-06, "loss": 0.0564, "step": 7929 }, { "epoch": 2.16, "grad_norm": 1.467250007441262, "learning_rate": 1.8982124815353665e-06, "loss": 0.0493, "step": 7930 }, { "epoch": 2.17, "grad_norm": 1.7129036898423464, "learning_rate": 1.8970567801965683e-06, "loss": 0.048, "step": 7931 }, { "epoch": 2.17, "grad_norm": 1.5738389476471533, "learning_rate": 1.8959013484085836e-06, "loss": 0.0557, "step": 7932 }, { "epoch": 2.17, "grad_norm": 1.9010371420348786, "learning_rate": 1.894746186271782e-06, "loss": 0.0663, "step": 7933 }, { "epoch": 2.17, "grad_norm": 1.4948607626610058, "learning_rate": 1.8935912938865147e-06, "loss": 0.042, "step": 7934 }, { "epoch": 2.17, "grad_norm": 1.4713419730197863, "learning_rate": 1.8924366713531045e-06, "loss": 0.0412, "step": 7935 }, { "epoch": 2.17, "grad_norm": 1.600376300169827, "learning_rate": 1.8912823187718548e-06, "loss": 0.0507, "step": 7936 }, { "epoch": 2.17, "grad_norm": 1.6508937238407522, "learning_rate": 1.8901282362430424e-06, "loss": 0.06, "step": 7937 }, { "epoch": 2.17, "grad_norm": 1.4163073160036752, "learning_rate": 1.8889744238669216e-06, "loss": 0.0385, "step": 7938 }, { "epoch": 2.17, "grad_norm": 1.3955973018967271, "learning_rate": 1.8878208817437216e-06, "loss": 0.0412, "step": 7939 }, { "epoch": 2.17, "grad_norm": 1.4066356353652896, "learning_rate": 1.8866676099736536e-06, "loss": 0.0457, "step": 7940 }, { "epoch": 2.17, "grad_norm": 1.637933505667924, "learning_rate": 1.8855146086568982e-06, "loss": 0.062, "step": 7941 }, { "epoch": 2.17, "grad_norm": 1.7075314762120908, "learning_rate": 1.8843618778936195e-06, "loss": 0.0583, "step": 7942 }, { "epoch": 2.17, "grad_norm": 1.58256969676848, "learning_rate": 1.883209417783952e-06, "loss": 0.0576, "step": 7943 }, { "epoch": 2.17, "grad_norm": 1.5541818941354404, "learning_rate": 1.8820572284280102e-06, "loss": 0.0509, "step": 7944 }, { "epoch": 2.17, "grad_norm": 1.4799399075083148, "learning_rate": 1.8809053099258817e-06, "loss": 0.0431, "step": 7945 }, { "epoch": 2.17, "grad_norm": 1.4743103841131193, "learning_rate": 1.879753662377637e-06, "loss": 0.0578, "step": 7946 }, { "epoch": 2.17, "grad_norm": 1.4398616584650685, "learning_rate": 1.8786022858833148e-06, "loss": 0.0477, "step": 7947 }, { "epoch": 2.17, "grad_norm": 1.3442030241696408, "learning_rate": 1.8774511805429385e-06, "loss": 0.043, "step": 7948 }, { "epoch": 2.17, "grad_norm": 1.4179464346123711, "learning_rate": 1.8763003464565022e-06, "loss": 0.0387, "step": 7949 }, { "epoch": 2.17, "grad_norm": 1.5208001600803147, "learning_rate": 1.875149783723978e-06, "loss": 0.0416, "step": 7950 }, { "epoch": 2.17, "grad_norm": 1.8263714613786792, "learning_rate": 1.873999492445313e-06, "loss": 0.0669, "step": 7951 }, { "epoch": 2.17, "grad_norm": 1.7793321224033813, "learning_rate": 1.8728494727204354e-06, "loss": 0.0461, "step": 7952 }, { "epoch": 2.17, "grad_norm": 1.8735140163478394, "learning_rate": 1.871699724649244e-06, "loss": 0.0607, "step": 7953 }, { "epoch": 2.17, "grad_norm": 1.3845715251761794, "learning_rate": 1.8705502483316196e-06, "loss": 0.0397, "step": 7954 }, { "epoch": 2.17, "grad_norm": 1.4686588841829997, "learning_rate": 1.8694010438674144e-06, "loss": 0.0507, "step": 7955 }, { "epoch": 2.17, "grad_norm": 1.5587442667979023, "learning_rate": 1.86825211135646e-06, "loss": 0.0516, "step": 7956 }, { "epoch": 2.17, "grad_norm": 1.4229870764587589, "learning_rate": 1.8671034508985615e-06, "loss": 0.0485, "step": 7957 }, { "epoch": 2.17, "grad_norm": 1.3384009962772436, "learning_rate": 1.8659550625935052e-06, "loss": 0.0434, "step": 7958 }, { "epoch": 2.17, "grad_norm": 1.8612363626812651, "learning_rate": 1.8648069465410483e-06, "loss": 0.0579, "step": 7959 }, { "epoch": 2.17, "grad_norm": 1.8425298960201364, "learning_rate": 1.8636591028409302e-06, "loss": 0.0599, "step": 7960 }, { "epoch": 2.17, "grad_norm": 1.3945461370950045, "learning_rate": 1.862511531592861e-06, "loss": 0.038, "step": 7961 }, { "epoch": 2.17, "grad_norm": 1.3389043576546187, "learning_rate": 1.8613642328965303e-06, "loss": 0.0414, "step": 7962 }, { "epoch": 2.17, "grad_norm": 1.1975136829967095, "learning_rate": 1.8602172068516011e-06, "loss": 0.0338, "step": 7963 }, { "epoch": 2.17, "grad_norm": 1.378548529035634, "learning_rate": 1.8590704535577187e-06, "loss": 0.0449, "step": 7964 }, { "epoch": 2.17, "grad_norm": 1.5011630520910686, "learning_rate": 1.8579239731144971e-06, "loss": 0.0545, "step": 7965 }, { "epoch": 2.17, "grad_norm": 1.531846314230187, "learning_rate": 1.8567777656215336e-06, "loss": 0.0509, "step": 7966 }, { "epoch": 2.17, "grad_norm": 1.4693219926895753, "learning_rate": 1.8556318311783977e-06, "loss": 0.0419, "step": 7967 }, { "epoch": 2.18, "grad_norm": 1.2588706748102763, "learning_rate": 1.854486169884635e-06, "loss": 0.0388, "step": 7968 }, { "epoch": 2.18, "grad_norm": 1.4164366033679199, "learning_rate": 1.853340781839767e-06, "loss": 0.0439, "step": 7969 }, { "epoch": 2.18, "grad_norm": 1.4050453327497048, "learning_rate": 1.8521956671432967e-06, "loss": 0.0356, "step": 7970 }, { "epoch": 2.18, "grad_norm": 1.8175427152650618, "learning_rate": 1.8510508258946957e-06, "loss": 0.0513, "step": 7971 }, { "epoch": 2.18, "grad_norm": 1.3251804820540158, "learning_rate": 1.8499062581934197e-06, "loss": 0.0462, "step": 7972 }, { "epoch": 2.18, "grad_norm": 1.6045768540629455, "learning_rate": 1.8487619641388938e-06, "loss": 0.0458, "step": 7973 }, { "epoch": 2.18, "grad_norm": 1.8535817236846683, "learning_rate": 1.847617943830523e-06, "loss": 0.0577, "step": 7974 }, { "epoch": 2.18, "grad_norm": 1.481519681921305, "learning_rate": 1.846474197367686e-06, "loss": 0.0448, "step": 7975 }, { "epoch": 2.18, "grad_norm": 1.3772340241703847, "learning_rate": 1.845330724849742e-06, "loss": 0.0408, "step": 7976 }, { "epoch": 2.18, "grad_norm": 1.3416299516981818, "learning_rate": 1.8441875263760211e-06, "loss": 0.0421, "step": 7977 }, { "epoch": 2.18, "grad_norm": 1.49517404288257, "learning_rate": 1.8430446020458353e-06, "loss": 0.049, "step": 7978 }, { "epoch": 2.18, "grad_norm": 1.8490889058601157, "learning_rate": 1.8419019519584685e-06, "loss": 0.0562, "step": 7979 }, { "epoch": 2.18, "grad_norm": 1.5075018156490743, "learning_rate": 1.8407595762131814e-06, "loss": 0.0489, "step": 7980 }, { "epoch": 2.18, "grad_norm": 1.6817974447433879, "learning_rate": 1.8396174749092105e-06, "loss": 0.0546, "step": 7981 }, { "epoch": 2.18, "grad_norm": 1.4796280241751474, "learning_rate": 1.8384756481457723e-06, "loss": 0.0488, "step": 7982 }, { "epoch": 2.18, "grad_norm": 1.5436936192131094, "learning_rate": 1.8373340960220531e-06, "loss": 0.0489, "step": 7983 }, { "epoch": 2.18, "grad_norm": 1.36442829898563, "learning_rate": 1.836192818637223e-06, "loss": 0.0426, "step": 7984 }, { "epoch": 2.18, "grad_norm": 1.2747600440766713, "learning_rate": 1.8350518160904213e-06, "loss": 0.0409, "step": 7985 }, { "epoch": 2.18, "grad_norm": 1.6958963639081952, "learning_rate": 1.8339110884807671e-06, "loss": 0.0529, "step": 7986 }, { "epoch": 2.18, "grad_norm": 1.6459501166549269, "learning_rate": 1.8327706359073526e-06, "loss": 0.051, "step": 7987 }, { "epoch": 2.18, "grad_norm": 1.332145295643568, "learning_rate": 1.8316304584692517e-06, "loss": 0.0398, "step": 7988 }, { "epoch": 2.18, "grad_norm": 1.6092169476961729, "learning_rate": 1.830490556265508e-06, "loss": 0.0409, "step": 7989 }, { "epoch": 2.18, "grad_norm": 1.4238509726761988, "learning_rate": 1.8293509293951468e-06, "loss": 0.0407, "step": 7990 }, { "epoch": 2.18, "grad_norm": 1.7838877695225555, "learning_rate": 1.8282115779571651e-06, "loss": 0.0478, "step": 7991 }, { "epoch": 2.18, "grad_norm": 1.5751210671199023, "learning_rate": 1.8270725020505387e-06, "loss": 0.0491, "step": 7992 }, { "epoch": 2.18, "grad_norm": 1.4135249575881417, "learning_rate": 1.8259337017742158e-06, "loss": 0.0507, "step": 7993 }, { "epoch": 2.18, "grad_norm": 1.527191930554032, "learning_rate": 1.8247951772271267e-06, "loss": 0.0416, "step": 7994 }, { "epoch": 2.18, "grad_norm": 1.7271732494130831, "learning_rate": 1.8236569285081707e-06, "loss": 0.0494, "step": 7995 }, { "epoch": 2.18, "grad_norm": 1.5132401768208266, "learning_rate": 1.8225189557162315e-06, "loss": 0.047, "step": 7996 }, { "epoch": 2.18, "grad_norm": 1.380208601564516, "learning_rate": 1.8213812589501611e-06, "loss": 0.0416, "step": 7997 }, { "epoch": 2.18, "grad_norm": 1.7298877135663888, "learning_rate": 1.820243838308791e-06, "loss": 0.0464, "step": 7998 }, { "epoch": 2.18, "grad_norm": 1.7337897090316465, "learning_rate": 1.8191066938909263e-06, "loss": 0.0535, "step": 7999 }, { "epoch": 2.18, "grad_norm": 1.5534026481394936, "learning_rate": 1.8179698257953543e-06, "loss": 0.0563, "step": 8000 }, { "epoch": 2.18, "grad_norm": 1.559369432043881, "learning_rate": 1.8168332341208294e-06, "loss": 0.0459, "step": 8001 }, { "epoch": 2.18, "grad_norm": 1.4050147857745554, "learning_rate": 1.8156969189660911e-06, "loss": 0.0389, "step": 8002 }, { "epoch": 2.18, "grad_norm": 1.554806608506223, "learning_rate": 1.8145608804298482e-06, "loss": 0.0483, "step": 8003 }, { "epoch": 2.19, "grad_norm": 1.5440321275716231, "learning_rate": 1.8134251186107875e-06, "loss": 0.0442, "step": 8004 }, { "epoch": 2.19, "grad_norm": 1.7955266330060053, "learning_rate": 1.8122896336075708e-06, "loss": 0.0562, "step": 8005 }, { "epoch": 2.19, "grad_norm": 1.9204853015330836, "learning_rate": 1.8111544255188402e-06, "loss": 0.0512, "step": 8006 }, { "epoch": 2.19, "grad_norm": 1.6436730648768774, "learning_rate": 1.8100194944432064e-06, "loss": 0.0479, "step": 8007 }, { "epoch": 2.19, "grad_norm": 1.6989422880228322, "learning_rate": 1.8088848404792652e-06, "loss": 0.0555, "step": 8008 }, { "epoch": 2.19, "grad_norm": 1.9347168220677684, "learning_rate": 1.80775046372558e-06, "loss": 0.0609, "step": 8009 }, { "epoch": 2.19, "grad_norm": 1.691600062440382, "learning_rate": 1.8066163642806945e-06, "loss": 0.0487, "step": 8010 }, { "epoch": 2.19, "grad_norm": 1.4340196154343636, "learning_rate": 1.8054825422431248e-06, "loss": 0.0444, "step": 8011 }, { "epoch": 2.19, "grad_norm": 1.4120811621061868, "learning_rate": 1.8043489977113688e-06, "loss": 0.044, "step": 8012 }, { "epoch": 2.19, "grad_norm": 1.9203459576589121, "learning_rate": 1.8032157307838943e-06, "loss": 0.0568, "step": 8013 }, { "epoch": 2.19, "grad_norm": 1.799035326438951, "learning_rate": 1.8020827415591496e-06, "loss": 0.0618, "step": 8014 }, { "epoch": 2.19, "grad_norm": 1.4528087066170114, "learning_rate": 1.8009500301355564e-06, "loss": 0.0444, "step": 8015 }, { "epoch": 2.19, "grad_norm": 1.589583956397223, "learning_rate": 1.7998175966115116e-06, "loss": 0.0503, "step": 8016 }, { "epoch": 2.19, "grad_norm": 1.4324950618079342, "learning_rate": 1.7986854410853882e-06, "loss": 0.0435, "step": 8017 }, { "epoch": 2.19, "grad_norm": 1.4062335053653605, "learning_rate": 1.7975535636555387e-06, "loss": 0.0419, "step": 8018 }, { "epoch": 2.19, "grad_norm": 1.7419709273642674, "learning_rate": 1.7964219644202852e-06, "loss": 0.0526, "step": 8019 }, { "epoch": 2.19, "grad_norm": 1.3341372470147885, "learning_rate": 1.7952906434779327e-06, "loss": 0.0369, "step": 8020 }, { "epoch": 2.19, "grad_norm": 1.4258158119073165, "learning_rate": 1.794159600926757e-06, "loss": 0.0438, "step": 8021 }, { "epoch": 2.19, "grad_norm": 1.379169774591084, "learning_rate": 1.79302883686501e-06, "loss": 0.0463, "step": 8022 }, { "epoch": 2.19, "grad_norm": 1.540008647033397, "learning_rate": 1.7918983513909199e-06, "loss": 0.0491, "step": 8023 }, { "epoch": 2.19, "grad_norm": 1.5660911562346658, "learning_rate": 1.790768144602692e-06, "loss": 0.0537, "step": 8024 }, { "epoch": 2.19, "grad_norm": 1.3889209327573362, "learning_rate": 1.7896382165985094e-06, "loss": 0.0448, "step": 8025 }, { "epoch": 2.19, "grad_norm": 1.3516853927826242, "learning_rate": 1.7885085674765263e-06, "loss": 0.0416, "step": 8026 }, { "epoch": 2.19, "grad_norm": 1.3760715658806681, "learning_rate": 1.7873791973348737e-06, "loss": 0.0397, "step": 8027 }, { "epoch": 2.19, "grad_norm": 1.4267088077724834, "learning_rate": 1.7862501062716591e-06, "loss": 0.0414, "step": 8028 }, { "epoch": 2.19, "grad_norm": 1.4855078173588327, "learning_rate": 1.7851212943849682e-06, "loss": 0.051, "step": 8029 }, { "epoch": 2.19, "grad_norm": 1.5198410609474282, "learning_rate": 1.7839927617728569e-06, "loss": 0.0499, "step": 8030 }, { "epoch": 2.19, "grad_norm": 1.6076867577005096, "learning_rate": 1.7828645085333645e-06, "loss": 0.0451, "step": 8031 }, { "epoch": 2.19, "grad_norm": 1.5671940949394, "learning_rate": 1.7817365347644993e-06, "loss": 0.0479, "step": 8032 }, { "epoch": 2.19, "grad_norm": 1.7189734338329012, "learning_rate": 1.7806088405642474e-06, "loss": 0.0502, "step": 8033 }, { "epoch": 2.19, "grad_norm": 1.4450312993810916, "learning_rate": 1.7794814260305699e-06, "loss": 0.0452, "step": 8034 }, { "epoch": 2.19, "grad_norm": 1.3382518455599572, "learning_rate": 1.7783542912614076e-06, "loss": 0.0466, "step": 8035 }, { "epoch": 2.19, "grad_norm": 1.614169787987761, "learning_rate": 1.7772274363546704e-06, "loss": 0.0475, "step": 8036 }, { "epoch": 2.19, "grad_norm": 1.4421716431037035, "learning_rate": 1.7761008614082515e-06, "loss": 0.0471, "step": 8037 }, { "epoch": 2.19, "grad_norm": 2.0413040133287192, "learning_rate": 1.774974566520014e-06, "loss": 0.0522, "step": 8038 }, { "epoch": 2.19, "grad_norm": 1.566075586598455, "learning_rate": 1.773848551787798e-06, "loss": 0.0498, "step": 8039 }, { "epoch": 2.19, "grad_norm": 1.6986095896473912, "learning_rate": 1.7727228173094184e-06, "loss": 0.0456, "step": 8040 }, { "epoch": 2.2, "grad_norm": 1.7825050305559207, "learning_rate": 1.7715973631826705e-06, "loss": 0.052, "step": 8041 }, { "epoch": 2.2, "grad_norm": 1.272601867544646, "learning_rate": 1.7704721895053179e-06, "loss": 0.041, "step": 8042 }, { "epoch": 2.2, "grad_norm": 1.514402068915755, "learning_rate": 1.7693472963751079e-06, "loss": 0.0488, "step": 8043 }, { "epoch": 2.2, "grad_norm": 1.6702403608819327, "learning_rate": 1.768222683889757e-06, "loss": 0.0486, "step": 8044 }, { "epoch": 2.2, "grad_norm": 1.6935623722822029, "learning_rate": 1.7670983521469597e-06, "loss": 0.0547, "step": 8045 }, { "epoch": 2.2, "grad_norm": 1.431338935557627, "learning_rate": 1.7659743012443853e-06, "loss": 0.0438, "step": 8046 }, { "epoch": 2.2, "grad_norm": 1.4563254395396996, "learning_rate": 1.7648505312796814e-06, "loss": 0.0447, "step": 8047 }, { "epoch": 2.2, "grad_norm": 1.3035631970500714, "learning_rate": 1.7637270423504664e-06, "loss": 0.0426, "step": 8048 }, { "epoch": 2.2, "grad_norm": 1.3488227248215918, "learning_rate": 1.7626038345543405e-06, "loss": 0.0434, "step": 8049 }, { "epoch": 2.2, "grad_norm": 1.2698785890593605, "learning_rate": 1.7614809079888744e-06, "loss": 0.0343, "step": 8050 }, { "epoch": 2.2, "grad_norm": 1.5533410238420473, "learning_rate": 1.7603582627516163e-06, "loss": 0.0479, "step": 8051 }, { "epoch": 2.2, "grad_norm": 1.9021485521373942, "learning_rate": 1.7592358989400882e-06, "loss": 0.0538, "step": 8052 }, { "epoch": 2.2, "grad_norm": 1.303909725115401, "learning_rate": 1.7581138166517913e-06, "loss": 0.044, "step": 8053 }, { "epoch": 2.2, "grad_norm": 1.6831880049600092, "learning_rate": 1.7569920159841985e-06, "loss": 0.0575, "step": 8054 }, { "epoch": 2.2, "grad_norm": 1.7407240845738021, "learning_rate": 1.7558704970347622e-06, "loss": 0.0516, "step": 8055 }, { "epoch": 2.2, "grad_norm": 1.3835252675587282, "learning_rate": 1.7547492599009063e-06, "loss": 0.0426, "step": 8056 }, { "epoch": 2.2, "grad_norm": 1.4277012234796869, "learning_rate": 1.7536283046800328e-06, "loss": 0.0505, "step": 8057 }, { "epoch": 2.2, "grad_norm": 1.4210503861735482, "learning_rate": 1.7525076314695167e-06, "loss": 0.0374, "step": 8058 }, { "epoch": 2.2, "grad_norm": 1.8054495669733548, "learning_rate": 1.7513872403667125e-06, "loss": 0.0528, "step": 8059 }, { "epoch": 2.2, "grad_norm": 1.3772449148379393, "learning_rate": 1.7502671314689457e-06, "loss": 0.046, "step": 8060 }, { "epoch": 2.2, "grad_norm": 1.6692236331777652, "learning_rate": 1.749147304873522e-06, "loss": 0.0518, "step": 8061 }, { "epoch": 2.2, "grad_norm": 1.412111684924484, "learning_rate": 1.748027760677719e-06, "loss": 0.0457, "step": 8062 }, { "epoch": 2.2, "grad_norm": 1.718977405971166, "learning_rate": 1.746908498978791e-06, "loss": 0.0597, "step": 8063 }, { "epoch": 2.2, "grad_norm": 1.7404234800898917, "learning_rate": 1.7457895198739649e-06, "loss": 0.0512, "step": 8064 }, { "epoch": 2.2, "grad_norm": 1.5089479098161227, "learning_rate": 1.7446708234604498e-06, "loss": 0.0537, "step": 8065 }, { "epoch": 2.2, "grad_norm": 1.7481606711683564, "learning_rate": 1.7435524098354228e-06, "loss": 0.0634, "step": 8066 }, { "epoch": 2.2, "grad_norm": 1.363526497440536, "learning_rate": 1.7424342790960436e-06, "loss": 0.0402, "step": 8067 }, { "epoch": 2.2, "grad_norm": 1.7307837232743941, "learning_rate": 1.741316431339441e-06, "loss": 0.0544, "step": 8068 }, { "epoch": 2.2, "grad_norm": 1.714394427871754, "learning_rate": 1.7401988666627217e-06, "loss": 0.0507, "step": 8069 }, { "epoch": 2.2, "grad_norm": 1.5831542380686199, "learning_rate": 1.7390815851629672e-06, "loss": 0.056, "step": 8070 }, { "epoch": 2.2, "grad_norm": 1.41649346975632, "learning_rate": 1.737964586937238e-06, "loss": 0.048, "step": 8071 }, { "epoch": 2.2, "grad_norm": 1.3478573344921505, "learning_rate": 1.7368478720825633e-06, "loss": 0.0461, "step": 8072 }, { "epoch": 2.2, "grad_norm": 1.3444617830925452, "learning_rate": 1.7357314406959552e-06, "loss": 0.0435, "step": 8073 }, { "epoch": 2.2, "grad_norm": 1.3615280247243722, "learning_rate": 1.7346152928743958e-06, "loss": 0.0531, "step": 8074 }, { "epoch": 2.2, "grad_norm": 1.4176401270277832, "learning_rate": 1.7334994287148438e-06, "loss": 0.0454, "step": 8075 }, { "epoch": 2.2, "grad_norm": 1.542296395951026, "learning_rate": 1.7323838483142318e-06, "loss": 0.0509, "step": 8076 }, { "epoch": 2.21, "grad_norm": 1.3237752219958212, "learning_rate": 1.7312685517694737e-06, "loss": 0.0415, "step": 8077 }, { "epoch": 2.21, "grad_norm": 1.611921170291769, "learning_rate": 1.7301535391774516e-06, "loss": 0.0549, "step": 8078 }, { "epoch": 2.21, "grad_norm": 1.575347973006268, "learning_rate": 1.7290388106350276e-06, "loss": 0.0522, "step": 8079 }, { "epoch": 2.21, "grad_norm": 1.45970055885112, "learning_rate": 1.7279243662390377e-06, "loss": 0.0446, "step": 8080 }, { "epoch": 2.21, "grad_norm": 1.7104706386011317, "learning_rate": 1.7268102060862918e-06, "loss": 0.0534, "step": 8081 }, { "epoch": 2.21, "grad_norm": 1.431100505675895, "learning_rate": 1.7256963302735752e-06, "loss": 0.0405, "step": 8082 }, { "epoch": 2.21, "grad_norm": 1.3900376877398863, "learning_rate": 1.7245827388976527e-06, "loss": 0.0433, "step": 8083 }, { "epoch": 2.21, "grad_norm": 1.2620913341087951, "learning_rate": 1.723469432055258e-06, "loss": 0.0452, "step": 8084 }, { "epoch": 2.21, "grad_norm": 1.7308685436396103, "learning_rate": 1.7223564098431067e-06, "loss": 0.0501, "step": 8085 }, { "epoch": 2.21, "grad_norm": 1.6840377202226817, "learning_rate": 1.7212436723578851e-06, "loss": 0.0516, "step": 8086 }, { "epoch": 2.21, "grad_norm": 1.4636601149365835, "learning_rate": 1.7201312196962561e-06, "loss": 0.0487, "step": 8087 }, { "epoch": 2.21, "grad_norm": 1.6426251357750212, "learning_rate": 1.7190190519548555e-06, "loss": 0.0454, "step": 8088 }, { "epoch": 2.21, "grad_norm": 1.7792510714521894, "learning_rate": 1.7179071692303002e-06, "loss": 0.0615, "step": 8089 }, { "epoch": 2.21, "grad_norm": 1.6803777796043524, "learning_rate": 1.7167955716191753e-06, "loss": 0.051, "step": 8090 }, { "epoch": 2.21, "grad_norm": 1.4625246032667736, "learning_rate": 1.7156842592180484e-06, "loss": 0.049, "step": 8091 }, { "epoch": 2.21, "grad_norm": 1.2250366791458176, "learning_rate": 1.7145732321234565e-06, "loss": 0.0379, "step": 8092 }, { "epoch": 2.21, "grad_norm": 1.6438603753343128, "learning_rate": 1.7134624904319142e-06, "loss": 0.0436, "step": 8093 }, { "epoch": 2.21, "grad_norm": 1.6209682337135403, "learning_rate": 1.7123520342399091e-06, "loss": 0.0499, "step": 8094 }, { "epoch": 2.21, "grad_norm": 1.9108591718720465, "learning_rate": 1.7112418636439093e-06, "loss": 0.0575, "step": 8095 }, { "epoch": 2.21, "grad_norm": 1.9517829343892141, "learning_rate": 1.710131978740351e-06, "loss": 0.0627, "step": 8096 }, { "epoch": 2.21, "grad_norm": 1.6434603121572873, "learning_rate": 1.7090223796256527e-06, "loss": 0.0461, "step": 8097 }, { "epoch": 2.21, "grad_norm": 1.343880307664543, "learning_rate": 1.7079130663962034e-06, "loss": 0.038, "step": 8098 }, { "epoch": 2.21, "grad_norm": 1.5921481396759396, "learning_rate": 1.7068040391483676e-06, "loss": 0.0482, "step": 8099 }, { "epoch": 2.21, "grad_norm": 1.625854252274159, "learning_rate": 1.7056952979784853e-06, "loss": 0.0502, "step": 8100 }, { "epoch": 2.21, "grad_norm": 1.3860285372928698, "learning_rate": 1.7045868429828745e-06, "loss": 0.0427, "step": 8101 }, { "epoch": 2.21, "grad_norm": 1.2755142136046578, "learning_rate": 1.703478674257823e-06, "loss": 0.0394, "step": 8102 }, { "epoch": 2.21, "grad_norm": 1.5144339967954465, "learning_rate": 1.7023707918995996e-06, "loss": 0.0473, "step": 8103 }, { "epoch": 2.21, "grad_norm": 1.4380683276855128, "learning_rate": 1.701263196004445e-06, "loss": 0.0508, "step": 8104 }, { "epoch": 2.21, "grad_norm": 1.5011324477205539, "learning_rate": 1.7001558866685747e-06, "loss": 0.0488, "step": 8105 }, { "epoch": 2.21, "grad_norm": 1.715964400843852, "learning_rate": 1.699048863988178e-06, "loss": 0.048, "step": 8106 }, { "epoch": 2.21, "grad_norm": 1.5905625264814254, "learning_rate": 1.6979421280594249e-06, "loss": 0.0471, "step": 8107 }, { "epoch": 2.21, "grad_norm": 1.3480203972939815, "learning_rate": 1.6968356789784535e-06, "loss": 0.0437, "step": 8108 }, { "epoch": 2.21, "grad_norm": 1.373952464173078, "learning_rate": 1.695729516841384e-06, "loss": 0.042, "step": 8109 }, { "epoch": 2.21, "grad_norm": 1.3873495148985726, "learning_rate": 1.6946236417443062e-06, "loss": 0.0478, "step": 8110 }, { "epoch": 2.21, "grad_norm": 1.3800542523265455, "learning_rate": 1.6935180537832862e-06, "loss": 0.0472, "step": 8111 }, { "epoch": 2.21, "grad_norm": 1.455749077453802, "learning_rate": 1.692412753054365e-06, "loss": 0.0459, "step": 8112 }, { "epoch": 2.21, "grad_norm": 1.3448691339260663, "learning_rate": 1.6913077396535626e-06, "loss": 0.0439, "step": 8113 }, { "epoch": 2.22, "grad_norm": 1.4869596539761307, "learning_rate": 1.6902030136768665e-06, "loss": 0.0444, "step": 8114 }, { "epoch": 2.22, "grad_norm": 1.4002155329355563, "learning_rate": 1.6890985752202488e-06, "loss": 0.0395, "step": 8115 }, { "epoch": 2.22, "grad_norm": 1.4232349895983019, "learning_rate": 1.6879944243796477e-06, "loss": 0.0484, "step": 8116 }, { "epoch": 2.22, "grad_norm": 1.4808223520786983, "learning_rate": 1.68689056125098e-06, "loss": 0.0452, "step": 8117 }, { "epoch": 2.22, "grad_norm": 1.7180578842373968, "learning_rate": 1.6857869859301401e-06, "loss": 0.0584, "step": 8118 }, { "epoch": 2.22, "grad_norm": 1.4331455416117003, "learning_rate": 1.6846836985129916e-06, "loss": 0.051, "step": 8119 }, { "epoch": 2.22, "grad_norm": 1.6463101528931443, "learning_rate": 1.6835806990953802e-06, "loss": 0.0521, "step": 8120 }, { "epoch": 2.22, "grad_norm": 1.5398514608393399, "learning_rate": 1.6824779877731211e-06, "loss": 0.052, "step": 8121 }, { "epoch": 2.22, "grad_norm": 1.429756491684159, "learning_rate": 1.681375564642006e-06, "loss": 0.0429, "step": 8122 }, { "epoch": 2.22, "grad_norm": 1.3680878711697797, "learning_rate": 1.6802734297977997e-06, "loss": 0.0417, "step": 8123 }, { "epoch": 2.22, "grad_norm": 1.3931442380185792, "learning_rate": 1.6791715833362482e-06, "loss": 0.0471, "step": 8124 }, { "epoch": 2.22, "grad_norm": 1.649261543585834, "learning_rate": 1.6780700253530642e-06, "loss": 0.0539, "step": 8125 }, { "epoch": 2.22, "grad_norm": 1.4619022813850429, "learning_rate": 1.6769687559439425e-06, "loss": 0.0486, "step": 8126 }, { "epoch": 2.22, "grad_norm": 1.7863664521790024, "learning_rate": 1.6758677752045487e-06, "loss": 0.0535, "step": 8127 }, { "epoch": 2.22, "grad_norm": 1.7445993245602909, "learning_rate": 1.674767083230524e-06, "loss": 0.0639, "step": 8128 }, { "epoch": 2.22, "grad_norm": 1.5023427726537997, "learning_rate": 1.673666680117484e-06, "loss": 0.0489, "step": 8129 }, { "epoch": 2.22, "grad_norm": 1.513167149987485, "learning_rate": 1.6725665659610218e-06, "loss": 0.0497, "step": 8130 }, { "epoch": 2.22, "grad_norm": 1.621633800008838, "learning_rate": 1.6714667408567015e-06, "loss": 0.0494, "step": 8131 }, { "epoch": 2.22, "grad_norm": 1.8745827472701766, "learning_rate": 1.6703672049000673e-06, "loss": 0.0662, "step": 8132 }, { "epoch": 2.22, "grad_norm": 1.6706845039157907, "learning_rate": 1.6692679581866334e-06, "loss": 0.059, "step": 8133 }, { "epoch": 2.22, "grad_norm": 1.3533243399441997, "learning_rate": 1.6681690008118912e-06, "loss": 0.0427, "step": 8134 }, { "epoch": 2.22, "grad_norm": 1.3262834352381931, "learning_rate": 1.6670703328713039e-06, "loss": 0.0389, "step": 8135 }, { "epoch": 2.22, "grad_norm": 1.6236684887114572, "learning_rate": 1.665971954460316e-06, "loss": 0.0567, "step": 8136 }, { "epoch": 2.22, "grad_norm": 1.4524135917482524, "learning_rate": 1.6648738656743402e-06, "loss": 0.0477, "step": 8137 }, { "epoch": 2.22, "grad_norm": 1.6256087440277407, "learning_rate": 1.6637760666087688e-06, "loss": 0.0548, "step": 8138 }, { "epoch": 2.22, "grad_norm": 1.5138650294760831, "learning_rate": 1.6626785573589667e-06, "loss": 0.0543, "step": 8139 }, { "epoch": 2.22, "grad_norm": 1.4041457464715152, "learning_rate": 1.6615813380202728e-06, "loss": 0.0415, "step": 8140 }, { "epoch": 2.22, "grad_norm": 1.6309638677545721, "learning_rate": 1.6604844086880012e-06, "loss": 0.052, "step": 8141 }, { "epoch": 2.22, "grad_norm": 1.3477925797770858, "learning_rate": 1.6593877694574435e-06, "loss": 0.0475, "step": 8142 }, { "epoch": 2.22, "grad_norm": 1.2577528534930584, "learning_rate": 1.6582914204238621e-06, "loss": 0.04, "step": 8143 }, { "epoch": 2.22, "grad_norm": 1.7834576210377315, "learning_rate": 1.6571953616824987e-06, "loss": 0.0532, "step": 8144 }, { "epoch": 2.22, "grad_norm": 1.4863588895234348, "learning_rate": 1.6560995933285656e-06, "loss": 0.0531, "step": 8145 }, { "epoch": 2.22, "grad_norm": 1.5233881321112273, "learning_rate": 1.6550041154572521e-06, "loss": 0.0517, "step": 8146 }, { "epoch": 2.22, "grad_norm": 1.4501349439502198, "learning_rate": 1.65390892816372e-06, "loss": 0.0453, "step": 8147 }, { "epoch": 2.22, "grad_norm": 1.4360532250591533, "learning_rate": 1.6528140315431102e-06, "loss": 0.0415, "step": 8148 }, { "epoch": 2.22, "grad_norm": 1.7072872659935638, "learning_rate": 1.6517194256905329e-06, "loss": 0.0482, "step": 8149 }, { "epoch": 2.22, "grad_norm": 1.2083911373634373, "learning_rate": 1.650625110701079e-06, "loss": 0.0357, "step": 8150 }, { "epoch": 2.23, "grad_norm": 1.6809449758243455, "learning_rate": 1.6495310866698095e-06, "loss": 0.0492, "step": 8151 }, { "epoch": 2.23, "grad_norm": 1.5753231230797256, "learning_rate": 1.6484373536917615e-06, "loss": 0.043, "step": 8152 }, { "epoch": 2.23, "grad_norm": 1.5035187177744316, "learning_rate": 1.647343911861945e-06, "loss": 0.0513, "step": 8153 }, { "epoch": 2.23, "grad_norm": 1.4157189322156665, "learning_rate": 1.6462507612753503e-06, "loss": 0.0513, "step": 8154 }, { "epoch": 2.23, "grad_norm": 1.6742848983734384, "learning_rate": 1.6451579020269353e-06, "loss": 0.0455, "step": 8155 }, { "epoch": 2.23, "grad_norm": 1.6456541930501847, "learning_rate": 1.6440653342116398e-06, "loss": 0.0427, "step": 8156 }, { "epoch": 2.23, "grad_norm": 1.4984074307545814, "learning_rate": 1.642973057924372e-06, "loss": 0.0507, "step": 8157 }, { "epoch": 2.23, "grad_norm": 1.4772195637859555, "learning_rate": 1.6418810732600177e-06, "loss": 0.0515, "step": 8158 }, { "epoch": 2.23, "grad_norm": 1.7196871922385537, "learning_rate": 1.6407893803134357e-06, "loss": 0.0567, "step": 8159 }, { "epoch": 2.23, "grad_norm": 1.48391662020529, "learning_rate": 1.6396979791794631e-06, "loss": 0.054, "step": 8160 }, { "epoch": 2.23, "grad_norm": 1.3107200014290368, "learning_rate": 1.6386068699529067e-06, "loss": 0.0434, "step": 8161 }, { "epoch": 2.23, "grad_norm": 1.4567064986577705, "learning_rate": 1.6375160527285538e-06, "loss": 0.0418, "step": 8162 }, { "epoch": 2.23, "grad_norm": 1.268562273018905, "learning_rate": 1.636425527601161e-06, "loss": 0.0395, "step": 8163 }, { "epoch": 2.23, "grad_norm": 1.4922041447965735, "learning_rate": 1.635335294665462e-06, "loss": 0.0436, "step": 8164 }, { "epoch": 2.23, "grad_norm": 1.0988693132261922, "learning_rate": 1.6342453540161624e-06, "loss": 0.0351, "step": 8165 }, { "epoch": 2.23, "grad_norm": 1.6733420531633307, "learning_rate": 1.6331557057479485e-06, "loss": 0.0567, "step": 8166 }, { "epoch": 2.23, "grad_norm": 1.3788757259888342, "learning_rate": 1.632066349955474e-06, "loss": 0.049, "step": 8167 }, { "epoch": 2.23, "grad_norm": 1.4908065032829123, "learning_rate": 1.630977286733374e-06, "loss": 0.0389, "step": 8168 }, { "epoch": 2.23, "grad_norm": 1.6533841740056816, "learning_rate": 1.6298885161762528e-06, "loss": 0.0574, "step": 8169 }, { "epoch": 2.23, "grad_norm": 1.5621180884770862, "learning_rate": 1.6288000383786912e-06, "loss": 0.0465, "step": 8170 }, { "epoch": 2.23, "grad_norm": 1.502062037079539, "learning_rate": 1.6277118534352432e-06, "loss": 0.0448, "step": 8171 }, { "epoch": 2.23, "grad_norm": 1.5945718235288897, "learning_rate": 1.6266239614404421e-06, "loss": 0.0588, "step": 8172 }, { "epoch": 2.23, "grad_norm": 1.6403079569615602, "learning_rate": 1.6255363624887894e-06, "loss": 0.0509, "step": 8173 }, { "epoch": 2.23, "grad_norm": 1.7523551739406498, "learning_rate": 1.6244490566747667e-06, "loss": 0.0559, "step": 8174 }, { "epoch": 2.23, "grad_norm": 1.2695197993651435, "learning_rate": 1.6233620440928265e-06, "loss": 0.0368, "step": 8175 }, { "epoch": 2.23, "grad_norm": 1.520931362145433, "learning_rate": 1.6222753248373969e-06, "loss": 0.0467, "step": 8176 }, { "epoch": 2.23, "grad_norm": 1.6231449694365343, "learning_rate": 1.6211888990028785e-06, "loss": 0.0583, "step": 8177 }, { "epoch": 2.23, "grad_norm": 1.3378397772371289, "learning_rate": 1.6201027666836522e-06, "loss": 0.0451, "step": 8178 }, { "epoch": 2.23, "grad_norm": 1.392382853265308, "learning_rate": 1.6190169279740665e-06, "loss": 0.0448, "step": 8179 }, { "epoch": 2.23, "grad_norm": 1.4290789217294706, "learning_rate": 1.6179313829684506e-06, "loss": 0.044, "step": 8180 }, { "epoch": 2.23, "grad_norm": 1.503374201286744, "learning_rate": 1.6168461317611028e-06, "loss": 0.0427, "step": 8181 }, { "epoch": 2.23, "grad_norm": 1.5968457455456138, "learning_rate": 1.6157611744462998e-06, "loss": 0.0497, "step": 8182 }, { "epoch": 2.23, "grad_norm": 1.371448316537398, "learning_rate": 1.6146765111182877e-06, "loss": 0.0445, "step": 8183 }, { "epoch": 2.23, "grad_norm": 1.1858457832977316, "learning_rate": 1.6135921418712959e-06, "loss": 0.0383, "step": 8184 }, { "epoch": 2.23, "grad_norm": 1.6058401758467902, "learning_rate": 1.6125080667995174e-06, "loss": 0.0499, "step": 8185 }, { "epoch": 2.23, "grad_norm": 1.5202149913847696, "learning_rate": 1.6114242859971302e-06, "loss": 0.0458, "step": 8186 }, { "epoch": 2.24, "grad_norm": 1.4769161337097285, "learning_rate": 1.6103407995582794e-06, "loss": 0.0443, "step": 8187 }, { "epoch": 2.24, "grad_norm": 1.6533571102798723, "learning_rate": 1.6092576075770861e-06, "loss": 0.0522, "step": 8188 }, { "epoch": 2.24, "grad_norm": 1.5422006182545938, "learning_rate": 1.6081747101476464e-06, "loss": 0.0545, "step": 8189 }, { "epoch": 2.24, "grad_norm": 1.5070512459258634, "learning_rate": 1.6070921073640328e-06, "loss": 0.0515, "step": 8190 }, { "epoch": 2.24, "grad_norm": 1.4620442310669806, "learning_rate": 1.6060097993202878e-06, "loss": 0.0497, "step": 8191 }, { "epoch": 2.24, "grad_norm": 1.489696731931627, "learning_rate": 1.6049277861104345e-06, "loss": 0.0567, "step": 8192 }, { "epoch": 2.24, "grad_norm": 1.612398889856248, "learning_rate": 1.6038460678284644e-06, "loss": 0.0491, "step": 8193 }, { "epoch": 2.24, "grad_norm": 1.9378443665037954, "learning_rate": 1.602764644568346e-06, "loss": 0.0542, "step": 8194 }, { "epoch": 2.24, "grad_norm": 1.6721160058414224, "learning_rate": 1.6016835164240196e-06, "loss": 0.05, "step": 8195 }, { "epoch": 2.24, "grad_norm": 1.6975156677644365, "learning_rate": 1.6006026834894068e-06, "loss": 0.0518, "step": 8196 }, { "epoch": 2.24, "grad_norm": 1.4458671892588826, "learning_rate": 1.5995221458583943e-06, "loss": 0.0489, "step": 8197 }, { "epoch": 2.24, "grad_norm": 1.6811829650201813, "learning_rate": 1.5984419036248516e-06, "loss": 0.0518, "step": 8198 }, { "epoch": 2.24, "grad_norm": 1.449515878495356, "learning_rate": 1.5973619568826177e-06, "loss": 0.0416, "step": 8199 }, { "epoch": 2.24, "grad_norm": 1.3467642431847793, "learning_rate": 1.5962823057255055e-06, "loss": 0.0426, "step": 8200 }, { "epoch": 2.24, "grad_norm": 1.6081957757636973, "learning_rate": 1.5952029502473032e-06, "loss": 0.0463, "step": 8201 }, { "epoch": 2.24, "grad_norm": 1.4086829315300893, "learning_rate": 1.594123890541776e-06, "loss": 0.0412, "step": 8202 }, { "epoch": 2.24, "grad_norm": 1.6199228277316036, "learning_rate": 1.5930451267026592e-06, "loss": 0.0446, "step": 8203 }, { "epoch": 2.24, "grad_norm": 1.6248291539419761, "learning_rate": 1.5919666588236666e-06, "loss": 0.0546, "step": 8204 }, { "epoch": 2.24, "grad_norm": 1.4842107686924249, "learning_rate": 1.5908884869984831e-06, "loss": 0.0484, "step": 8205 }, { "epoch": 2.24, "grad_norm": 1.671322613966543, "learning_rate": 1.5898106113207685e-06, "loss": 0.0397, "step": 8206 }, { "epoch": 2.24, "grad_norm": 1.4622711130175166, "learning_rate": 1.5887330318841548e-06, "loss": 0.0408, "step": 8207 }, { "epoch": 2.24, "grad_norm": 1.6184641750276456, "learning_rate": 1.5876557487822553e-06, "loss": 0.0551, "step": 8208 }, { "epoch": 2.24, "grad_norm": 1.1953682265800787, "learning_rate": 1.5865787621086491e-06, "loss": 0.0394, "step": 8209 }, { "epoch": 2.24, "grad_norm": 1.4500851584533982, "learning_rate": 1.585502071956897e-06, "loss": 0.0462, "step": 8210 }, { "epoch": 2.24, "grad_norm": 1.5601892453149564, "learning_rate": 1.5844256784205275e-06, "loss": 0.0564, "step": 8211 }, { "epoch": 2.24, "grad_norm": 1.5545377472074746, "learning_rate": 1.583349581593046e-06, "loss": 0.0536, "step": 8212 }, { "epoch": 2.24, "grad_norm": 1.373363747993195, "learning_rate": 1.5822737815679357e-06, "loss": 0.0393, "step": 8213 }, { "epoch": 2.24, "grad_norm": 1.3016026986598959, "learning_rate": 1.5811982784386465e-06, "loss": 0.0398, "step": 8214 }, { "epoch": 2.24, "grad_norm": 1.585756995463118, "learning_rate": 1.5801230722986104e-06, "loss": 0.0465, "step": 8215 }, { "epoch": 2.24, "grad_norm": 1.2958485857896926, "learning_rate": 1.5790481632412286e-06, "loss": 0.0424, "step": 8216 }, { "epoch": 2.24, "grad_norm": 1.4837685635433642, "learning_rate": 1.577973551359877e-06, "loss": 0.0499, "step": 8217 }, { "epoch": 2.24, "grad_norm": 1.662840034025026, "learning_rate": 1.5768992367479058e-06, "loss": 0.0581, "step": 8218 }, { "epoch": 2.24, "grad_norm": 1.3661403932657985, "learning_rate": 1.575825219498643e-06, "loss": 0.039, "step": 8219 }, { "epoch": 2.24, "grad_norm": 1.9670123342118937, "learning_rate": 1.5747514997053841e-06, "loss": 0.0575, "step": 8220 }, { "epoch": 2.24, "grad_norm": 1.3005139170873676, "learning_rate": 1.5736780774614064e-06, "loss": 0.0407, "step": 8221 }, { "epoch": 2.24, "grad_norm": 1.6971504591094217, "learning_rate": 1.5726049528599552e-06, "loss": 0.0563, "step": 8222 }, { "epoch": 2.24, "grad_norm": 1.4422175737273988, "learning_rate": 1.5715321259942529e-06, "loss": 0.0424, "step": 8223 }, { "epoch": 2.25, "grad_norm": 1.397704009442535, "learning_rate": 1.5704595969574933e-06, "loss": 0.0434, "step": 8224 }, { "epoch": 2.25, "grad_norm": 1.7463487895342504, "learning_rate": 1.5693873658428494e-06, "loss": 0.0466, "step": 8225 }, { "epoch": 2.25, "grad_norm": 1.7809533676729927, "learning_rate": 1.568315432743462e-06, "loss": 0.0479, "step": 8226 }, { "epoch": 2.25, "grad_norm": 1.4753121900827932, "learning_rate": 1.567243797752453e-06, "loss": 0.046, "step": 8227 }, { "epoch": 2.25, "grad_norm": 1.827641416334775, "learning_rate": 1.5661724609629132e-06, "loss": 0.0519, "step": 8228 }, { "epoch": 2.25, "grad_norm": 1.4450154964479713, "learning_rate": 1.5651014224679083e-06, "loss": 0.0393, "step": 8229 }, { "epoch": 2.25, "grad_norm": 1.5660727273119208, "learning_rate": 1.5640306823604778e-06, "loss": 0.0518, "step": 8230 }, { "epoch": 2.25, "grad_norm": 1.38358427681166, "learning_rate": 1.5629602407336386e-06, "loss": 0.0453, "step": 8231 }, { "epoch": 2.25, "grad_norm": 1.5796561813012897, "learning_rate": 1.5618900976803769e-06, "loss": 0.0527, "step": 8232 }, { "epoch": 2.25, "grad_norm": 1.5153150126599382, "learning_rate": 1.560820253293659e-06, "loss": 0.0451, "step": 8233 }, { "epoch": 2.25, "grad_norm": 1.285072985482014, "learning_rate": 1.5597507076664187e-06, "loss": 0.0376, "step": 8234 }, { "epoch": 2.25, "grad_norm": 1.500505225742505, "learning_rate": 1.5586814608915673e-06, "loss": 0.0455, "step": 8235 }, { "epoch": 2.25, "grad_norm": 1.3908345798291033, "learning_rate": 1.5576125130619885e-06, "loss": 0.0428, "step": 8236 }, { "epoch": 2.25, "grad_norm": 1.7012003288897117, "learning_rate": 1.5565438642705444e-06, "loss": 0.0579, "step": 8237 }, { "epoch": 2.25, "grad_norm": 1.7326949587359681, "learning_rate": 1.5554755146100641e-06, "loss": 0.0442, "step": 8238 }, { "epoch": 2.25, "grad_norm": 1.6756464438744398, "learning_rate": 1.5544074641733574e-06, "loss": 0.0614, "step": 8239 }, { "epoch": 2.25, "grad_norm": 1.5122949431946533, "learning_rate": 1.5533397130532053e-06, "loss": 0.0462, "step": 8240 }, { "epoch": 2.25, "grad_norm": 1.652057648294175, "learning_rate": 1.5522722613423608e-06, "loss": 0.0499, "step": 8241 }, { "epoch": 2.25, "grad_norm": 1.9126688045031186, "learning_rate": 1.5512051091335518e-06, "loss": 0.0561, "step": 8242 }, { "epoch": 2.25, "grad_norm": 1.3140715771816118, "learning_rate": 1.5501382565194845e-06, "loss": 0.0427, "step": 8243 }, { "epoch": 2.25, "grad_norm": 1.5965372761091547, "learning_rate": 1.5490717035928327e-06, "loss": 0.0499, "step": 8244 }, { "epoch": 2.25, "grad_norm": 1.2378901385564058, "learning_rate": 1.5480054504462505e-06, "loss": 0.0447, "step": 8245 }, { "epoch": 2.25, "grad_norm": 1.4783177861200232, "learning_rate": 1.54693949717236e-06, "loss": 0.0492, "step": 8246 }, { "epoch": 2.25, "grad_norm": 1.734957528542089, "learning_rate": 1.5458738438637616e-06, "loss": 0.0556, "step": 8247 }, { "epoch": 2.25, "grad_norm": 1.3766987454261743, "learning_rate": 1.5448084906130252e-06, "loss": 0.0459, "step": 8248 }, { "epoch": 2.25, "grad_norm": 1.394131207218535, "learning_rate": 1.5437434375127008e-06, "loss": 0.0491, "step": 8249 }, { "epoch": 2.25, "grad_norm": 1.5074360486638545, "learning_rate": 1.542678684655306e-06, "loss": 0.0511, "step": 8250 }, { "epoch": 2.25, "grad_norm": 1.3399965064651054, "learning_rate": 1.5416142321333382e-06, "loss": 0.0389, "step": 8251 }, { "epoch": 2.25, "grad_norm": 1.2249441531394676, "learning_rate": 1.5405500800392643e-06, "loss": 0.0463, "step": 8252 }, { "epoch": 2.25, "grad_norm": 1.4473339032897732, "learning_rate": 1.5394862284655266e-06, "loss": 0.0475, "step": 8253 }, { "epoch": 2.25, "grad_norm": 1.4455559757662613, "learning_rate": 1.5384226775045391e-06, "loss": 0.0537, "step": 8254 }, { "epoch": 2.25, "grad_norm": 1.5672405166444547, "learning_rate": 1.5373594272486958e-06, "loss": 0.0513, "step": 8255 }, { "epoch": 2.25, "grad_norm": 1.5672297331037095, "learning_rate": 1.5362964777903565e-06, "loss": 0.0559, "step": 8256 }, { "epoch": 2.25, "grad_norm": 1.6241397004207176, "learning_rate": 1.5352338292218633e-06, "loss": 0.0546, "step": 8257 }, { "epoch": 2.25, "grad_norm": 1.3955630374610366, "learning_rate": 1.5341714816355257e-06, "loss": 0.0471, "step": 8258 }, { "epoch": 2.25, "grad_norm": 1.1999547123952439, "learning_rate": 1.5331094351236287e-06, "loss": 0.039, "step": 8259 }, { "epoch": 2.25, "grad_norm": 1.342496495178013, "learning_rate": 1.5320476897784309e-06, "loss": 0.0448, "step": 8260 }, { "epoch": 2.26, "grad_norm": 1.4455687678045765, "learning_rate": 1.5309862456921682e-06, "loss": 0.0517, "step": 8261 }, { "epoch": 2.26, "grad_norm": 1.39530499113681, "learning_rate": 1.5299251029570445e-06, "loss": 0.05, "step": 8262 }, { "epoch": 2.26, "grad_norm": 1.3220994274213405, "learning_rate": 1.5288642616652437e-06, "loss": 0.039, "step": 8263 }, { "epoch": 2.26, "grad_norm": 1.4951681318975283, "learning_rate": 1.5278037219089191e-06, "loss": 0.0442, "step": 8264 }, { "epoch": 2.26, "grad_norm": 1.6075466754630183, "learning_rate": 1.5267434837801993e-06, "loss": 0.0538, "step": 8265 }, { "epoch": 2.26, "grad_norm": 1.3379775521327348, "learning_rate": 1.5256835473711844e-06, "loss": 0.0437, "step": 8266 }, { "epoch": 2.26, "grad_norm": 1.4312589224188745, "learning_rate": 1.5246239127739542e-06, "loss": 0.0453, "step": 8267 }, { "epoch": 2.26, "grad_norm": 1.3663636985004097, "learning_rate": 1.523564580080555e-06, "loss": 0.0461, "step": 8268 }, { "epoch": 2.26, "grad_norm": 1.4176524878611945, "learning_rate": 1.5225055493830132e-06, "loss": 0.0425, "step": 8269 }, { "epoch": 2.26, "grad_norm": 1.732516355929053, "learning_rate": 1.5214468207733258e-06, "loss": 0.0504, "step": 8270 }, { "epoch": 2.26, "grad_norm": 1.224737498718848, "learning_rate": 1.5203883943434622e-06, "loss": 0.039, "step": 8271 }, { "epoch": 2.26, "grad_norm": 1.5288818835340634, "learning_rate": 1.5193302701853674e-06, "loss": 0.0476, "step": 8272 }, { "epoch": 2.26, "grad_norm": 1.4935261485706044, "learning_rate": 1.5182724483909618e-06, "loss": 0.0522, "step": 8273 }, { "epoch": 2.26, "grad_norm": 1.5565215827434007, "learning_rate": 1.5172149290521354e-06, "loss": 0.056, "step": 8274 }, { "epoch": 2.26, "grad_norm": 1.9884709849723492, "learning_rate": 1.5161577122607573e-06, "loss": 0.0402, "step": 8275 }, { "epoch": 2.26, "grad_norm": 1.3028556613529376, "learning_rate": 1.5151007981086657e-06, "loss": 0.0417, "step": 8276 }, { "epoch": 2.26, "grad_norm": 1.5348910211719016, "learning_rate": 1.5140441866876737e-06, "loss": 0.0442, "step": 8277 }, { "epoch": 2.26, "grad_norm": 1.3886215791758076, "learning_rate": 1.5129878780895674e-06, "loss": 0.0481, "step": 8278 }, { "epoch": 2.26, "grad_norm": 1.272547487762864, "learning_rate": 1.5119318724061105e-06, "loss": 0.0426, "step": 8279 }, { "epoch": 2.26, "grad_norm": 1.4731114345791578, "learning_rate": 1.5108761697290348e-06, "loss": 0.0497, "step": 8280 }, { "epoch": 2.26, "grad_norm": 1.3083630761844314, "learning_rate": 1.5098207701500511e-06, "loss": 0.0375, "step": 8281 }, { "epoch": 2.26, "grad_norm": 1.5699638342191817, "learning_rate": 1.5087656737608403e-06, "loss": 0.0483, "step": 8282 }, { "epoch": 2.26, "grad_norm": 1.498460961471617, "learning_rate": 1.5077108806530582e-06, "loss": 0.0433, "step": 8283 }, { "epoch": 2.26, "grad_norm": 1.5632110315905887, "learning_rate": 1.5066563909183318e-06, "loss": 0.0546, "step": 8284 }, { "epoch": 2.26, "grad_norm": 1.4392588062823115, "learning_rate": 1.5056022046482678e-06, "loss": 0.0486, "step": 8285 }, { "epoch": 2.26, "grad_norm": 1.4589594886235493, "learning_rate": 1.5045483219344387e-06, "loss": 0.04, "step": 8286 }, { "epoch": 2.26, "grad_norm": 1.5648431783414312, "learning_rate": 1.5034947428683988e-06, "loss": 0.0436, "step": 8287 }, { "epoch": 2.26, "grad_norm": 1.7609842689865909, "learning_rate": 1.5024414675416693e-06, "loss": 0.0459, "step": 8288 }, { "epoch": 2.26, "grad_norm": 1.350515146554619, "learning_rate": 1.5013884960457486e-06, "loss": 0.0397, "step": 8289 }, { "epoch": 2.26, "grad_norm": 1.3098330974189372, "learning_rate": 1.5003358284721053e-06, "loss": 0.0416, "step": 8290 }, { "epoch": 2.26, "grad_norm": 1.5036578497152129, "learning_rate": 1.499283464912188e-06, "loss": 0.0447, "step": 8291 }, { "epoch": 2.26, "grad_norm": 1.5651364818544082, "learning_rate": 1.498231405457411e-06, "loss": 0.0461, "step": 8292 }, { "epoch": 2.26, "grad_norm": 1.3121323365412014, "learning_rate": 1.4971796501991698e-06, "loss": 0.0386, "step": 8293 }, { "epoch": 2.26, "grad_norm": 1.5941825792596624, "learning_rate": 1.4961281992288273e-06, "loss": 0.0467, "step": 8294 }, { "epoch": 2.26, "grad_norm": 1.683937099982629, "learning_rate": 1.4950770526377233e-06, "loss": 0.0536, "step": 8295 }, { "epoch": 2.26, "grad_norm": 1.5409491534288338, "learning_rate": 1.4940262105171683e-06, "loss": 0.0424, "step": 8296 }, { "epoch": 2.27, "grad_norm": 1.5077052770456136, "learning_rate": 1.4929756729584517e-06, "loss": 0.0517, "step": 8297 }, { "epoch": 2.27, "grad_norm": 1.300047304475324, "learning_rate": 1.4919254400528293e-06, "loss": 0.0439, "step": 8298 }, { "epoch": 2.27, "grad_norm": 12.172105749993614, "learning_rate": 1.490875511891538e-06, "loss": 0.102, "step": 8299 }, { "epoch": 2.27, "grad_norm": 1.5024623483180286, "learning_rate": 1.4898258885657829e-06, "loss": 0.0479, "step": 8300 }, { "epoch": 2.27, "grad_norm": 1.4931677611979917, "learning_rate": 1.488776570166744e-06, "loss": 0.0488, "step": 8301 }, { "epoch": 2.27, "grad_norm": 1.4514742640496727, "learning_rate": 1.4877275567855726e-06, "loss": 0.047, "step": 8302 }, { "epoch": 2.27, "grad_norm": 1.4019400305553669, "learning_rate": 1.4866788485133988e-06, "loss": 0.0485, "step": 8303 }, { "epoch": 2.27, "grad_norm": 1.6287996964540272, "learning_rate": 1.4856304454413239e-06, "loss": 0.0506, "step": 8304 }, { "epoch": 2.27, "grad_norm": 1.4868805746551998, "learning_rate": 1.484582347660421e-06, "loss": 0.0531, "step": 8305 }, { "epoch": 2.27, "grad_norm": 1.4888731329030027, "learning_rate": 1.483534555261737e-06, "loss": 0.0494, "step": 8306 }, { "epoch": 2.27, "grad_norm": 1.4111030865181216, "learning_rate": 1.4824870683362919e-06, "loss": 0.0462, "step": 8307 }, { "epoch": 2.27, "grad_norm": 1.406357081249897, "learning_rate": 1.4814398869750835e-06, "loss": 0.0444, "step": 8308 }, { "epoch": 2.27, "grad_norm": 1.5717985211013366, "learning_rate": 1.4803930112690767e-06, "loss": 0.0522, "step": 8309 }, { "epoch": 2.27, "grad_norm": 1.4446290558389596, "learning_rate": 1.4793464413092161e-06, "loss": 0.0516, "step": 8310 }, { "epoch": 2.27, "grad_norm": 2.5742204561616804, "learning_rate": 1.4783001771864148e-06, "loss": 0.0519, "step": 8311 }, { "epoch": 2.27, "grad_norm": 1.7160132763569504, "learning_rate": 1.4772542189915607e-06, "loss": 0.0617, "step": 8312 }, { "epoch": 2.27, "grad_norm": 1.6815000844088648, "learning_rate": 1.4762085668155152e-06, "loss": 0.0477, "step": 8313 }, { "epoch": 2.27, "grad_norm": 2.0618387408373655, "learning_rate": 1.4751632207491156e-06, "loss": 0.0415, "step": 8314 }, { "epoch": 2.27, "grad_norm": 1.4048991861042903, "learning_rate": 1.4741181808831679e-06, "loss": 0.0479, "step": 8315 }, { "epoch": 2.27, "grad_norm": 1.2133614825597814, "learning_rate": 1.4730734473084568e-06, "loss": 0.0347, "step": 8316 }, { "epoch": 2.27, "grad_norm": 1.6134061828270194, "learning_rate": 1.4720290201157361e-06, "loss": 0.0554, "step": 8317 }, { "epoch": 2.27, "grad_norm": 1.3520837327594406, "learning_rate": 1.4709848993957348e-06, "loss": 0.0456, "step": 8318 }, { "epoch": 2.27, "grad_norm": 1.3233795649071949, "learning_rate": 1.4699410852391538e-06, "loss": 0.0458, "step": 8319 }, { "epoch": 2.27, "grad_norm": 1.5509257591050072, "learning_rate": 1.4688975777366716e-06, "loss": 0.0534, "step": 8320 }, { "epoch": 2.27, "grad_norm": 1.6362817497012556, "learning_rate": 1.4678543769789334e-06, "loss": 0.043, "step": 8321 }, { "epoch": 2.27, "grad_norm": 1.4749488016933494, "learning_rate": 1.4668114830565644e-06, "loss": 0.0447, "step": 8322 }, { "epoch": 2.27, "grad_norm": 1.4044337363804473, "learning_rate": 1.4657688960601595e-06, "loss": 0.0461, "step": 8323 }, { "epoch": 2.27, "grad_norm": 1.9998902185186296, "learning_rate": 1.4647266160802876e-06, "loss": 0.051, "step": 8324 }, { "epoch": 2.27, "grad_norm": 1.531686299491867, "learning_rate": 1.4636846432074885e-06, "loss": 0.0465, "step": 8325 }, { "epoch": 2.27, "grad_norm": 1.4807920919029836, "learning_rate": 1.4626429775322816e-06, "loss": 0.0489, "step": 8326 }, { "epoch": 2.27, "grad_norm": 1.6165714203585164, "learning_rate": 1.4616016191451522e-06, "loss": 0.0483, "step": 8327 }, { "epoch": 2.27, "grad_norm": 1.6709100672648618, "learning_rate": 1.4605605681365658e-06, "loss": 0.0511, "step": 8328 }, { "epoch": 2.27, "grad_norm": 1.6902104631480812, "learning_rate": 1.459519824596956e-06, "loss": 0.052, "step": 8329 }, { "epoch": 2.27, "grad_norm": 1.4895467800675666, "learning_rate": 1.4584793886167326e-06, "loss": 0.0427, "step": 8330 }, { "epoch": 2.27, "grad_norm": 1.9864664994658, "learning_rate": 1.4574392602862746e-06, "loss": 0.052, "step": 8331 }, { "epoch": 2.27, "grad_norm": 1.3695823961156617, "learning_rate": 1.4563994396959419e-06, "loss": 0.0469, "step": 8332 }, { "epoch": 2.27, "grad_norm": 1.4316052015152396, "learning_rate": 1.455359926936059e-06, "loss": 0.0437, "step": 8333 }, { "epoch": 2.28, "grad_norm": 1.8415366814078669, "learning_rate": 1.4543207220969308e-06, "loss": 0.0499, "step": 8334 }, { "epoch": 2.28, "grad_norm": 1.5702074325399997, "learning_rate": 1.453281825268832e-06, "loss": 0.0462, "step": 8335 }, { "epoch": 2.28, "grad_norm": 1.540584543189919, "learning_rate": 1.4522432365420092e-06, "loss": 0.0477, "step": 8336 }, { "epoch": 2.28, "grad_norm": 1.407507528222732, "learning_rate": 1.4512049560066837e-06, "loss": 0.0451, "step": 8337 }, { "epoch": 2.28, "grad_norm": 1.9855033825595423, "learning_rate": 1.4501669837530535e-06, "loss": 0.0646, "step": 8338 }, { "epoch": 2.28, "grad_norm": 1.4180086107954244, "learning_rate": 1.4491293198712824e-06, "loss": 0.0447, "step": 8339 }, { "epoch": 2.28, "grad_norm": 1.331053280706459, "learning_rate": 1.4480919644515156e-06, "loss": 0.0461, "step": 8340 }, { "epoch": 2.28, "grad_norm": 1.5193673526707658, "learning_rate": 1.447054917583866e-06, "loss": 0.0495, "step": 8341 }, { "epoch": 2.28, "grad_norm": 1.300157323420643, "learning_rate": 1.4460181793584211e-06, "loss": 0.0418, "step": 8342 }, { "epoch": 2.28, "grad_norm": 1.5097993982684867, "learning_rate": 1.4449817498652402e-06, "loss": 0.0402, "step": 8343 }, { "epoch": 2.28, "grad_norm": 1.6630864133225698, "learning_rate": 1.4439456291943605e-06, "loss": 0.0499, "step": 8344 }, { "epoch": 2.28, "grad_norm": 1.4299568979547996, "learning_rate": 1.4429098174357852e-06, "loss": 0.0552, "step": 8345 }, { "epoch": 2.28, "grad_norm": 1.7279825954082573, "learning_rate": 1.4418743146794988e-06, "loss": 0.0522, "step": 8346 }, { "epoch": 2.28, "grad_norm": 1.4154458776971695, "learning_rate": 1.4408391210154532e-06, "loss": 0.0397, "step": 8347 }, { "epoch": 2.28, "grad_norm": 1.8938103248443001, "learning_rate": 1.4398042365335745e-06, "loss": 0.0436, "step": 8348 }, { "epoch": 2.28, "grad_norm": 1.7226860280165233, "learning_rate": 1.4387696613237612e-06, "loss": 0.0554, "step": 8349 }, { "epoch": 2.28, "grad_norm": 1.3466427262747704, "learning_rate": 1.4377353954758893e-06, "loss": 0.0415, "step": 8350 }, { "epoch": 2.28, "grad_norm": 1.3161238505774129, "learning_rate": 1.4367014390798023e-06, "loss": 0.041, "step": 8351 }, { "epoch": 2.28, "grad_norm": 1.5900747019268302, "learning_rate": 1.4356677922253215e-06, "loss": 0.0477, "step": 8352 }, { "epoch": 2.28, "grad_norm": 1.32011524623867, "learning_rate": 1.4346344550022384e-06, "loss": 0.0442, "step": 8353 }, { "epoch": 2.28, "grad_norm": 1.3693774615991687, "learning_rate": 1.433601427500318e-06, "loss": 0.0434, "step": 8354 }, { "epoch": 2.28, "grad_norm": 1.7667166268277283, "learning_rate": 1.4325687098092967e-06, "loss": 0.0532, "step": 8355 }, { "epoch": 2.28, "grad_norm": 1.6198050635260666, "learning_rate": 1.4315363020188905e-06, "loss": 0.0535, "step": 8356 }, { "epoch": 2.28, "grad_norm": 1.5357686740989378, "learning_rate": 1.43050420421878e-06, "loss": 0.042, "step": 8357 }, { "epoch": 2.28, "grad_norm": 1.456401216956577, "learning_rate": 1.4294724164986262e-06, "loss": 0.0562, "step": 8358 }, { "epoch": 2.28, "grad_norm": 1.1297419015709385, "learning_rate": 1.428440938948058e-06, "loss": 0.0396, "step": 8359 }, { "epoch": 2.28, "grad_norm": 1.184290446734807, "learning_rate": 1.4274097716566804e-06, "loss": 0.0361, "step": 8360 }, { "epoch": 2.28, "grad_norm": 1.4144277719985296, "learning_rate": 1.4263789147140672e-06, "loss": 0.0471, "step": 8361 }, { "epoch": 2.28, "grad_norm": 1.352838176835097, "learning_rate": 1.4253483682097724e-06, "loss": 0.0464, "step": 8362 }, { "epoch": 2.28, "grad_norm": 1.5941458067979706, "learning_rate": 1.424318132233316e-06, "loss": 0.0463, "step": 8363 }, { "epoch": 2.28, "grad_norm": 1.238919348327796, "learning_rate": 1.423288206874196e-06, "loss": 0.0357, "step": 8364 }, { "epoch": 2.28, "grad_norm": 1.3419119425218575, "learning_rate": 1.4222585922218812e-06, "loss": 0.0426, "step": 8365 }, { "epoch": 2.28, "grad_norm": 1.4758226842481141, "learning_rate": 1.4212292883658123e-06, "loss": 0.0495, "step": 8366 }, { "epoch": 2.28, "grad_norm": 1.6322923517552461, "learning_rate": 1.4202002953954042e-06, "loss": 0.0507, "step": 8367 }, { "epoch": 2.28, "grad_norm": 1.4633052185354565, "learning_rate": 1.4191716134000466e-06, "loss": 0.0499, "step": 8368 }, { "epoch": 2.28, "grad_norm": 1.7650050568315068, "learning_rate": 1.4181432424690978e-06, "loss": 0.0483, "step": 8369 }, { "epoch": 2.29, "grad_norm": 1.6427714555071173, "learning_rate": 1.4171151826918954e-06, "loss": 0.0477, "step": 8370 }, { "epoch": 2.29, "grad_norm": 1.4896222721922217, "learning_rate": 1.4160874341577447e-06, "loss": 0.0511, "step": 8371 }, { "epoch": 2.29, "grad_norm": 1.484449023956297, "learning_rate": 1.4150599969559247e-06, "loss": 0.0414, "step": 8372 }, { "epoch": 2.29, "grad_norm": 1.7047880114514589, "learning_rate": 1.4140328711756878e-06, "loss": 0.0544, "step": 8373 }, { "epoch": 2.29, "grad_norm": 1.5009777808506528, "learning_rate": 1.4130060569062626e-06, "loss": 0.0535, "step": 8374 }, { "epoch": 2.29, "grad_norm": 1.6048544200299082, "learning_rate": 1.4119795542368441e-06, "loss": 0.0481, "step": 8375 }, { "epoch": 2.29, "grad_norm": 1.5585570352010696, "learning_rate": 1.410953363256608e-06, "loss": 0.0508, "step": 8376 }, { "epoch": 2.29, "grad_norm": 1.3799036939663358, "learning_rate": 1.409927484054696e-06, "loss": 0.0441, "step": 8377 }, { "epoch": 2.29, "grad_norm": 1.5925558622322644, "learning_rate": 1.4089019167202278e-06, "loss": 0.0505, "step": 8378 }, { "epoch": 2.29, "grad_norm": 1.3611386914926655, "learning_rate": 1.40787666134229e-06, "loss": 0.0443, "step": 8379 }, { "epoch": 2.29, "grad_norm": 1.6997664744256886, "learning_rate": 1.4068517180099505e-06, "loss": 0.0539, "step": 8380 }, { "epoch": 2.29, "grad_norm": 1.5498846553070156, "learning_rate": 1.4058270868122414e-06, "loss": 0.048, "step": 8381 }, { "epoch": 2.29, "grad_norm": 1.7127464722234402, "learning_rate": 1.404802767838176e-06, "loss": 0.0512, "step": 8382 }, { "epoch": 2.29, "grad_norm": 1.4403881440700712, "learning_rate": 1.403778761176734e-06, "loss": 0.0386, "step": 8383 }, { "epoch": 2.29, "grad_norm": 1.3583720807686566, "learning_rate": 1.40275506691687e-06, "loss": 0.0461, "step": 8384 }, { "epoch": 2.29, "grad_norm": 1.5222095081602156, "learning_rate": 1.4017316851475105e-06, "loss": 0.0436, "step": 8385 }, { "epoch": 2.29, "grad_norm": 1.521601829147279, "learning_rate": 1.4007086159575595e-06, "loss": 0.0436, "step": 8386 }, { "epoch": 2.29, "grad_norm": 1.4458539856113788, "learning_rate": 1.399685859435887e-06, "loss": 0.046, "step": 8387 }, { "epoch": 2.29, "grad_norm": 1.3888603809176023, "learning_rate": 1.3986634156713418e-06, "loss": 0.0476, "step": 8388 }, { "epoch": 2.29, "grad_norm": 1.4381609684188719, "learning_rate": 1.3976412847527427e-06, "loss": 0.0483, "step": 8389 }, { "epoch": 2.29, "grad_norm": 1.6570854948404339, "learning_rate": 1.3966194667688804e-06, "loss": 0.0446, "step": 8390 }, { "epoch": 2.29, "grad_norm": 1.5305075610106071, "learning_rate": 1.3955979618085185e-06, "loss": 0.0473, "step": 8391 }, { "epoch": 2.29, "grad_norm": 1.4948790962246123, "learning_rate": 1.394576769960398e-06, "loss": 0.0526, "step": 8392 }, { "epoch": 2.29, "grad_norm": 1.4374064108066564, "learning_rate": 1.3935558913132252e-06, "loss": 0.0446, "step": 8393 }, { "epoch": 2.29, "grad_norm": 1.5177200705284128, "learning_rate": 1.3925353259556873e-06, "loss": 0.0487, "step": 8394 }, { "epoch": 2.29, "grad_norm": 1.530365334909462, "learning_rate": 1.3915150739764383e-06, "loss": 0.0439, "step": 8395 }, { "epoch": 2.29, "grad_norm": 1.7696838517185172, "learning_rate": 1.390495135464105e-06, "loss": 0.0596, "step": 8396 }, { "epoch": 2.29, "grad_norm": 1.4838624229345398, "learning_rate": 1.3894755105072922e-06, "loss": 0.0443, "step": 8397 }, { "epoch": 2.29, "grad_norm": 1.3905998379468776, "learning_rate": 1.388456199194571e-06, "loss": 0.049, "step": 8398 }, { "epoch": 2.29, "grad_norm": 1.4161479350346862, "learning_rate": 1.3874372016144915e-06, "loss": 0.044, "step": 8399 }, { "epoch": 2.29, "grad_norm": 2.5687670441615165, "learning_rate": 1.3864185178555722e-06, "loss": 0.0645, "step": 8400 }, { "epoch": 2.29, "grad_norm": 1.3348445935435789, "learning_rate": 1.3854001480063045e-06, "loss": 0.0441, "step": 8401 }, { "epoch": 2.29, "grad_norm": 1.4346253356636294, "learning_rate": 1.3843820921551532e-06, "loss": 0.0423, "step": 8402 }, { "epoch": 2.29, "grad_norm": 1.591177346691786, "learning_rate": 1.3833643503905587e-06, "loss": 0.0546, "step": 8403 }, { "epoch": 2.29, "grad_norm": 1.6276614540571444, "learning_rate": 1.3823469228009284e-06, "loss": 0.0461, "step": 8404 }, { "epoch": 2.29, "grad_norm": 1.4627172050713082, "learning_rate": 1.3813298094746491e-06, "loss": 0.0489, "step": 8405 }, { "epoch": 2.29, "grad_norm": 1.3288218497670228, "learning_rate": 1.380313010500075e-06, "loss": 0.043, "step": 8406 }, { "epoch": 2.3, "grad_norm": 1.617531211939985, "learning_rate": 1.379296525965535e-06, "loss": 0.0502, "step": 8407 }, { "epoch": 2.3, "grad_norm": 1.674245561824862, "learning_rate": 1.3782803559593288e-06, "loss": 0.0461, "step": 8408 }, { "epoch": 2.3, "grad_norm": 1.5522588077515165, "learning_rate": 1.3772645005697337e-06, "loss": 0.047, "step": 8409 }, { "epoch": 2.3, "grad_norm": 1.6345117720229083, "learning_rate": 1.3762489598849937e-06, "loss": 0.0498, "step": 8410 }, { "epoch": 2.3, "grad_norm": 1.4953551944731487, "learning_rate": 1.3752337339933308e-06, "loss": 0.0426, "step": 8411 }, { "epoch": 2.3, "grad_norm": 1.3703123107656219, "learning_rate": 1.3742188229829351e-06, "loss": 0.0461, "step": 8412 }, { "epoch": 2.3, "grad_norm": 1.6992285160849898, "learning_rate": 1.3732042269419721e-06, "loss": 0.0524, "step": 8413 }, { "epoch": 2.3, "grad_norm": 1.334320971273496, "learning_rate": 1.3721899459585775e-06, "loss": 0.0394, "step": 8414 }, { "epoch": 2.3, "grad_norm": 1.414424594508028, "learning_rate": 1.371175980120864e-06, "loss": 0.0428, "step": 8415 }, { "epoch": 2.3, "grad_norm": 1.4242672219815427, "learning_rate": 1.3701623295169115e-06, "loss": 0.047, "step": 8416 }, { "epoch": 2.3, "grad_norm": 1.457491914156786, "learning_rate": 1.369148994234778e-06, "loss": 0.0507, "step": 8417 }, { "epoch": 2.3, "grad_norm": 1.2359557515047375, "learning_rate": 1.36813597436249e-06, "loss": 0.0368, "step": 8418 }, { "epoch": 2.3, "grad_norm": 1.2911869016831212, "learning_rate": 1.3671232699880477e-06, "loss": 0.0406, "step": 8419 }, { "epoch": 2.3, "grad_norm": 1.484748242976053, "learning_rate": 1.3661108811994228e-06, "loss": 0.0524, "step": 8420 }, { "epoch": 2.3, "grad_norm": 1.3895683804827463, "learning_rate": 1.365098808084564e-06, "loss": 0.0446, "step": 8421 }, { "epoch": 2.3, "grad_norm": 1.4170835967344488, "learning_rate": 1.3640870507313859e-06, "loss": 0.0405, "step": 8422 }, { "epoch": 2.3, "grad_norm": 1.6071124760191486, "learning_rate": 1.363075609227783e-06, "loss": 0.0473, "step": 8423 }, { "epoch": 2.3, "grad_norm": 2.1428289128589673, "learning_rate": 1.362064483661617e-06, "loss": 0.0453, "step": 8424 }, { "epoch": 2.3, "grad_norm": 1.873356745272441, "learning_rate": 1.3610536741207237e-06, "loss": 0.0574, "step": 8425 }, { "epoch": 2.3, "grad_norm": 1.5425176026557368, "learning_rate": 1.3600431806929092e-06, "loss": 0.0459, "step": 8426 }, { "epoch": 2.3, "grad_norm": 1.5886468194550611, "learning_rate": 1.3590330034659588e-06, "loss": 0.0427, "step": 8427 }, { "epoch": 2.3, "grad_norm": 1.3648565176668856, "learning_rate": 1.3580231425276224e-06, "loss": 0.0433, "step": 8428 }, { "epoch": 2.3, "grad_norm": 1.7523541361556187, "learning_rate": 1.3570135979656285e-06, "loss": 0.0458, "step": 8429 }, { "epoch": 2.3, "grad_norm": 1.6013968146494437, "learning_rate": 1.356004369867675e-06, "loss": 0.0541, "step": 8430 }, { "epoch": 2.3, "grad_norm": 1.5824063498560141, "learning_rate": 1.354995458321432e-06, "loss": 0.0445, "step": 8431 }, { "epoch": 2.3, "grad_norm": 1.3884894606982499, "learning_rate": 1.3539868634145425e-06, "loss": 0.0413, "step": 8432 }, { "epoch": 2.3, "grad_norm": 1.5887128111742896, "learning_rate": 1.352978585234625e-06, "loss": 0.0471, "step": 8433 }, { "epoch": 2.3, "grad_norm": 1.5234331807270978, "learning_rate": 1.3519706238692654e-06, "loss": 0.0424, "step": 8434 }, { "epoch": 2.3, "grad_norm": 1.5269440177301963, "learning_rate": 1.3509629794060269e-06, "loss": 0.0528, "step": 8435 }, { "epoch": 2.3, "grad_norm": 1.6203135430157731, "learning_rate": 1.3499556519324424e-06, "loss": 0.0462, "step": 8436 }, { "epoch": 2.3, "grad_norm": 1.4982311276058105, "learning_rate": 1.3489486415360175e-06, "loss": 0.05, "step": 8437 }, { "epoch": 2.3, "grad_norm": 1.6381407859014134, "learning_rate": 1.3479419483042288e-06, "loss": 0.0444, "step": 8438 }, { "epoch": 2.3, "grad_norm": 1.6729275348263937, "learning_rate": 1.3469355723245303e-06, "loss": 0.0602, "step": 8439 }, { "epoch": 2.3, "grad_norm": 1.312937306709134, "learning_rate": 1.3459295136843426e-06, "loss": 0.0425, "step": 8440 }, { "epoch": 2.3, "grad_norm": 1.4151699155857165, "learning_rate": 1.344923772471064e-06, "loss": 0.0375, "step": 8441 }, { "epoch": 2.3, "grad_norm": 1.6076921729086373, "learning_rate": 1.3439183487720608e-06, "loss": 0.0389, "step": 8442 }, { "epoch": 2.3, "grad_norm": 1.7502260076523841, "learning_rate": 1.3429132426746743e-06, "loss": 0.0542, "step": 8443 }, { "epoch": 2.31, "grad_norm": 1.3759592163463548, "learning_rate": 1.3419084542662159e-06, "loss": 0.0436, "step": 8444 }, { "epoch": 2.31, "grad_norm": 1.3667868670914456, "learning_rate": 1.3409039836339738e-06, "loss": 0.0413, "step": 8445 }, { "epoch": 2.31, "grad_norm": 1.430006305991176, "learning_rate": 1.3398998308652027e-06, "loss": 0.0464, "step": 8446 }, { "epoch": 2.31, "grad_norm": 1.4000544687170169, "learning_rate": 1.3388959960471354e-06, "loss": 0.0471, "step": 8447 }, { "epoch": 2.31, "grad_norm": 1.5205588386122524, "learning_rate": 1.337892479266974e-06, "loss": 0.0485, "step": 8448 }, { "epoch": 2.31, "grad_norm": 1.547599164751151, "learning_rate": 1.336889280611892e-06, "loss": 0.0461, "step": 8449 }, { "epoch": 2.31, "grad_norm": 1.4917334250993264, "learning_rate": 1.3358864001690358e-06, "loss": 0.0483, "step": 8450 }, { "epoch": 2.31, "grad_norm": 1.3226379052393897, "learning_rate": 1.3348838380255287e-06, "loss": 0.0351, "step": 8451 }, { "epoch": 2.31, "grad_norm": 1.4735105183595614, "learning_rate": 1.3338815942684586e-06, "loss": 0.0393, "step": 8452 }, { "epoch": 2.31, "grad_norm": 1.3411039447895312, "learning_rate": 1.3328796689848932e-06, "loss": 0.0429, "step": 8453 }, { "epoch": 2.31, "grad_norm": 1.467414501587144, "learning_rate": 1.3318780622618682e-06, "loss": 0.039, "step": 8454 }, { "epoch": 2.31, "grad_norm": 1.5796853165072808, "learning_rate": 1.3308767741863916e-06, "loss": 0.0468, "step": 8455 }, { "epoch": 2.31, "grad_norm": 1.4478500287379872, "learning_rate": 1.3298758048454436e-06, "loss": 0.04, "step": 8456 }, { "epoch": 2.31, "grad_norm": 1.7639759433878386, "learning_rate": 1.3288751543259814e-06, "loss": 0.0506, "step": 8457 }, { "epoch": 2.31, "grad_norm": 1.7069931454481615, "learning_rate": 1.327874822714927e-06, "loss": 0.0562, "step": 8458 }, { "epoch": 2.31, "grad_norm": 1.4785677746262984, "learning_rate": 1.3268748100991819e-06, "loss": 0.0471, "step": 8459 }, { "epoch": 2.31, "grad_norm": 1.6479370001976672, "learning_rate": 1.3258751165656154e-06, "loss": 0.0491, "step": 8460 }, { "epoch": 2.31, "grad_norm": 1.907082005923064, "learning_rate": 1.32487574220107e-06, "loss": 0.0522, "step": 8461 }, { "epoch": 2.31, "grad_norm": 1.317951193676135, "learning_rate": 1.3238766870923592e-06, "loss": 0.041, "step": 8462 }, { "epoch": 2.31, "grad_norm": 1.5725227842357739, "learning_rate": 1.3228779513262735e-06, "loss": 0.0514, "step": 8463 }, { "epoch": 2.31, "grad_norm": 1.3940749983392537, "learning_rate": 1.3218795349895696e-06, "loss": 0.0428, "step": 8464 }, { "epoch": 2.31, "grad_norm": 1.7230539461782544, "learning_rate": 1.3208814381689822e-06, "loss": 0.0508, "step": 8465 }, { "epoch": 2.31, "grad_norm": 1.4873764065491668, "learning_rate": 1.3198836609512134e-06, "loss": 0.0466, "step": 8466 }, { "epoch": 2.31, "grad_norm": 1.5750390945745487, "learning_rate": 1.3188862034229405e-06, "loss": 0.0504, "step": 8467 }, { "epoch": 2.31, "grad_norm": 2.0003054516575274, "learning_rate": 1.3178890656708094e-06, "loss": 0.0564, "step": 8468 }, { "epoch": 2.31, "grad_norm": 1.5316559571103365, "learning_rate": 1.3168922477814444e-06, "loss": 0.0545, "step": 8469 }, { "epoch": 2.31, "grad_norm": 1.4546673889450077, "learning_rate": 1.315895749841436e-06, "loss": 0.0367, "step": 8470 }, { "epoch": 2.31, "grad_norm": 1.2558656215709567, "learning_rate": 1.3148995719373514e-06, "loss": 0.0384, "step": 8471 }, { "epoch": 2.31, "grad_norm": 1.3404345812948024, "learning_rate": 1.313903714155727e-06, "loss": 0.0485, "step": 8472 }, { "epoch": 2.31, "grad_norm": 1.3450725133900352, "learning_rate": 1.3129081765830725e-06, "loss": 0.0396, "step": 8473 }, { "epoch": 2.31, "grad_norm": 1.663221008325269, "learning_rate": 1.3119129593058676e-06, "loss": 0.0492, "step": 8474 }, { "epoch": 2.31, "grad_norm": 1.5002123392750324, "learning_rate": 1.3109180624105699e-06, "loss": 0.0494, "step": 8475 }, { "epoch": 2.31, "grad_norm": 1.2717798296710185, "learning_rate": 1.3099234859836019e-06, "loss": 0.0389, "step": 8476 }, { "epoch": 2.31, "grad_norm": 1.4234441191604637, "learning_rate": 1.3089292301113654e-06, "loss": 0.0455, "step": 8477 }, { "epoch": 2.31, "grad_norm": 1.3760897869864137, "learning_rate": 1.3079352948802294e-06, "loss": 0.0426, "step": 8478 }, { "epoch": 2.31, "grad_norm": 1.4137062615995946, "learning_rate": 1.3069416803765355e-06, "loss": 0.0391, "step": 8479 }, { "epoch": 2.32, "grad_norm": 1.4897742397366038, "learning_rate": 1.3059483866865973e-06, "loss": 0.0502, "step": 8480 }, { "epoch": 2.32, "grad_norm": 1.494539919238236, "learning_rate": 1.3049554138967052e-06, "loss": 0.0439, "step": 8481 }, { "epoch": 2.32, "grad_norm": 1.395305316581814, "learning_rate": 1.303962762093115e-06, "loss": 0.0474, "step": 8482 }, { "epoch": 2.32, "grad_norm": 1.3996936503578319, "learning_rate": 1.30297043136206e-06, "loss": 0.0472, "step": 8483 }, { "epoch": 2.32, "grad_norm": 1.4870434914698234, "learning_rate": 1.3019784217897423e-06, "loss": 0.0482, "step": 8484 }, { "epoch": 2.32, "grad_norm": 1.5743919203152352, "learning_rate": 1.3009867334623383e-06, "loss": 0.0519, "step": 8485 }, { "epoch": 2.32, "grad_norm": 1.3549515463752877, "learning_rate": 1.299995366465992e-06, "loss": 0.0441, "step": 8486 }, { "epoch": 2.32, "grad_norm": 1.6221175382632504, "learning_rate": 1.2990043208868253e-06, "loss": 0.0555, "step": 8487 }, { "epoch": 2.32, "grad_norm": 1.3421737233564357, "learning_rate": 1.2980135968109314e-06, "loss": 0.0423, "step": 8488 }, { "epoch": 2.32, "grad_norm": 1.2774057437083652, "learning_rate": 1.2970231943243716e-06, "loss": 0.042, "step": 8489 }, { "epoch": 2.32, "grad_norm": 1.3371086568778108, "learning_rate": 1.2960331135131826e-06, "loss": 0.0477, "step": 8490 }, { "epoch": 2.32, "grad_norm": 1.6058614752109275, "learning_rate": 1.29504335446337e-06, "loss": 0.0545, "step": 8491 }, { "epoch": 2.32, "grad_norm": 1.6995573501756085, "learning_rate": 1.2940539172609167e-06, "loss": 0.0519, "step": 8492 }, { "epoch": 2.32, "grad_norm": 1.6265437216382979, "learning_rate": 1.2930648019917719e-06, "loss": 0.0484, "step": 8493 }, { "epoch": 2.32, "grad_norm": 1.6344399301009649, "learning_rate": 1.2920760087418616e-06, "loss": 0.0498, "step": 8494 }, { "epoch": 2.32, "grad_norm": 1.5905380987075208, "learning_rate": 1.291087537597081e-06, "loss": 0.0535, "step": 8495 }, { "epoch": 2.32, "grad_norm": 1.522136909110268, "learning_rate": 1.2900993886432972e-06, "loss": 0.0527, "step": 8496 }, { "epoch": 2.32, "grad_norm": 1.6061318414981354, "learning_rate": 1.2891115619663496e-06, "loss": 0.0607, "step": 8497 }, { "epoch": 2.32, "grad_norm": 1.4407859489344852, "learning_rate": 1.288124057652052e-06, "loss": 0.0465, "step": 8498 }, { "epoch": 2.32, "grad_norm": 1.5940558056846568, "learning_rate": 1.2871368757861863e-06, "loss": 0.0522, "step": 8499 }, { "epoch": 2.32, "grad_norm": 1.4064833018887997, "learning_rate": 1.286150016454511e-06, "loss": 0.0426, "step": 8500 }, { "epoch": 2.32, "grad_norm": 1.4171605114573003, "learning_rate": 1.285163479742752e-06, "loss": 0.0481, "step": 8501 }, { "epoch": 2.32, "grad_norm": 1.3178897549524387, "learning_rate": 1.2841772657366103e-06, "loss": 0.0405, "step": 8502 }, { "epoch": 2.32, "grad_norm": 1.4766507787415275, "learning_rate": 1.283191374521755e-06, "loss": 0.0457, "step": 8503 }, { "epoch": 2.32, "grad_norm": 1.7000557818821727, "learning_rate": 1.2822058061838333e-06, "loss": 0.0515, "step": 8504 }, { "epoch": 2.32, "grad_norm": 1.5439167060505428, "learning_rate": 1.2812205608084582e-06, "loss": 0.0522, "step": 8505 }, { "epoch": 2.32, "grad_norm": 1.5870895114604513, "learning_rate": 1.2802356384812203e-06, "loss": 0.0508, "step": 8506 }, { "epoch": 2.32, "grad_norm": 1.6451203205736327, "learning_rate": 1.2792510392876777e-06, "loss": 0.0504, "step": 8507 }, { "epoch": 2.32, "grad_norm": 1.2243738082045, "learning_rate": 1.2782667633133617e-06, "loss": 0.0443, "step": 8508 }, { "epoch": 2.32, "grad_norm": 1.418898319882461, "learning_rate": 1.277282810643774e-06, "loss": 0.042, "step": 8509 }, { "epoch": 2.32, "grad_norm": 1.1158275519841194, "learning_rate": 1.2762991813643938e-06, "loss": 0.0349, "step": 8510 }, { "epoch": 2.32, "grad_norm": 1.3366414155435924, "learning_rate": 1.2753158755606649e-06, "loss": 0.0456, "step": 8511 }, { "epoch": 2.32, "grad_norm": 1.5914447543491548, "learning_rate": 1.2743328933180099e-06, "loss": 0.0475, "step": 8512 }, { "epoch": 2.32, "grad_norm": 1.1600521540835773, "learning_rate": 1.2733502347218174e-06, "loss": 0.0348, "step": 8513 }, { "epoch": 2.32, "grad_norm": 1.5148091858485222, "learning_rate": 1.2723678998574512e-06, "loss": 0.0521, "step": 8514 }, { "epoch": 2.32, "grad_norm": 1.3473752828762064, "learning_rate": 1.271385888810245e-06, "loss": 0.0487, "step": 8515 }, { "epoch": 2.32, "grad_norm": 1.6546302300001483, "learning_rate": 1.270404201665507e-06, "loss": 0.0484, "step": 8516 }, { "epoch": 2.33, "grad_norm": 1.3633386331234514, "learning_rate": 1.2694228385085144e-06, "loss": 0.0422, "step": 8517 }, { "epoch": 2.33, "grad_norm": 1.5336970750911085, "learning_rate": 1.2684417994245197e-06, "loss": 0.0432, "step": 8518 }, { "epoch": 2.33, "grad_norm": 1.5603455240822794, "learning_rate": 1.267461084498744e-06, "loss": 0.0451, "step": 8519 }, { "epoch": 2.33, "grad_norm": 1.8762383812560237, "learning_rate": 1.2664806938163816e-06, "loss": 0.0554, "step": 8520 }, { "epoch": 2.33, "grad_norm": 1.28453381011712, "learning_rate": 1.2655006274625959e-06, "loss": 0.0389, "step": 8521 }, { "epoch": 2.33, "grad_norm": 1.428045596548661, "learning_rate": 1.2645208855225289e-06, "loss": 0.0406, "step": 8522 }, { "epoch": 2.33, "grad_norm": 1.9319783469996132, "learning_rate": 1.263541468081287e-06, "loss": 0.0572, "step": 8523 }, { "epoch": 2.33, "grad_norm": 1.7249688951400397, "learning_rate": 1.262562375223954e-06, "loss": 0.0549, "step": 8524 }, { "epoch": 2.33, "grad_norm": 1.4468872876757757, "learning_rate": 1.2615836070355824e-06, "loss": 0.0486, "step": 8525 }, { "epoch": 2.33, "grad_norm": 1.8849498713579076, "learning_rate": 1.2606051636011963e-06, "loss": 0.0551, "step": 8526 }, { "epoch": 2.33, "grad_norm": 1.6715171523833798, "learning_rate": 1.2596270450057917e-06, "loss": 0.0445, "step": 8527 }, { "epoch": 2.33, "grad_norm": 1.375410485003809, "learning_rate": 1.2586492513343395e-06, "loss": 0.0444, "step": 8528 }, { "epoch": 2.33, "grad_norm": 1.4248116454668638, "learning_rate": 1.2576717826717782e-06, "loss": 0.0519, "step": 8529 }, { "epoch": 2.33, "grad_norm": 1.5661791644275904, "learning_rate": 1.2566946391030222e-06, "loss": 0.0483, "step": 8530 }, { "epoch": 2.33, "grad_norm": 1.4570841787472775, "learning_rate": 1.2557178207129533e-06, "loss": 0.045, "step": 8531 }, { "epoch": 2.33, "grad_norm": 1.4426313862710176, "learning_rate": 1.254741327586428e-06, "loss": 0.0442, "step": 8532 }, { "epoch": 2.33, "grad_norm": 1.2683220277439153, "learning_rate": 1.2537651598082718e-06, "loss": 0.039, "step": 8533 }, { "epoch": 2.33, "grad_norm": 1.6782206136657742, "learning_rate": 1.2527893174632872e-06, "loss": 0.0546, "step": 8534 }, { "epoch": 2.33, "grad_norm": 1.734121312462512, "learning_rate": 1.2518138006362413e-06, "loss": 0.0551, "step": 8535 }, { "epoch": 2.33, "grad_norm": 1.501804140901601, "learning_rate": 1.25083860941188e-06, "loss": 0.0459, "step": 8536 }, { "epoch": 2.33, "grad_norm": 1.6752448574634988, "learning_rate": 1.2498637438749162e-06, "loss": 0.0499, "step": 8537 }, { "epoch": 2.33, "grad_norm": 1.5116942502817403, "learning_rate": 1.2488892041100364e-06, "loss": 0.047, "step": 8538 }, { "epoch": 2.33, "grad_norm": 1.6083312456738155, "learning_rate": 1.2479149902018955e-06, "loss": 0.0395, "step": 8539 }, { "epoch": 2.33, "grad_norm": 1.6663518167753921, "learning_rate": 1.2469411022351273e-06, "loss": 0.0488, "step": 8540 }, { "epoch": 2.33, "grad_norm": 1.50799782759602, "learning_rate": 1.245967540294329e-06, "loss": 0.0478, "step": 8541 }, { "epoch": 2.33, "grad_norm": 1.3063215056198543, "learning_rate": 1.244994304464076e-06, "loss": 0.0429, "step": 8542 }, { "epoch": 2.33, "grad_norm": 1.2989047598794272, "learning_rate": 1.2440213948289121e-06, "loss": 0.0421, "step": 8543 }, { "epoch": 2.33, "grad_norm": 1.7475331138053989, "learning_rate": 1.243048811473353e-06, "loss": 0.0524, "step": 8544 }, { "epoch": 2.33, "grad_norm": 1.7630693986406565, "learning_rate": 1.2420765544818847e-06, "loss": 0.0583, "step": 8545 }, { "epoch": 2.33, "grad_norm": 1.579089601300963, "learning_rate": 1.2411046239389701e-06, "loss": 0.0464, "step": 8546 }, { "epoch": 2.33, "grad_norm": 1.540150314642911, "learning_rate": 1.2401330199290368e-06, "loss": 0.0444, "step": 8547 }, { "epoch": 2.33, "grad_norm": 1.1953884400127959, "learning_rate": 1.2391617425364904e-06, "loss": 0.0355, "step": 8548 }, { "epoch": 2.33, "grad_norm": 1.37445388901159, "learning_rate": 1.2381907918457042e-06, "loss": 0.0483, "step": 8549 }, { "epoch": 2.33, "grad_norm": 1.754158845305628, "learning_rate": 1.2372201679410233e-06, "loss": 0.0436, "step": 8550 }, { "epoch": 2.33, "grad_norm": 1.796609485108871, "learning_rate": 1.236249870906765e-06, "loss": 0.0429, "step": 8551 }, { "epoch": 2.33, "grad_norm": 1.2562714141270679, "learning_rate": 1.2352799008272198e-06, "loss": 0.0386, "step": 8552 }, { "epoch": 2.33, "grad_norm": 1.4240908051816248, "learning_rate": 1.2343102577866467e-06, "loss": 0.0401, "step": 8553 }, { "epoch": 2.34, "grad_norm": 1.5004670950701247, "learning_rate": 1.2333409418692804e-06, "loss": 0.0441, "step": 8554 }, { "epoch": 2.34, "grad_norm": 1.4212615492423462, "learning_rate": 1.2323719531593236e-06, "loss": 0.0433, "step": 8555 }, { "epoch": 2.34, "grad_norm": 1.5654421741205267, "learning_rate": 1.2314032917409513e-06, "loss": 0.0556, "step": 8556 }, { "epoch": 2.34, "grad_norm": 1.6395034598891283, "learning_rate": 1.2304349576983094e-06, "loss": 0.0599, "step": 8557 }, { "epoch": 2.34, "grad_norm": 1.386590966855871, "learning_rate": 1.2294669511155193e-06, "loss": 0.0428, "step": 8558 }, { "epoch": 2.34, "grad_norm": 1.3621744843099224, "learning_rate": 1.2284992720766686e-06, "loss": 0.0401, "step": 8559 }, { "epoch": 2.34, "grad_norm": 1.2418250408879725, "learning_rate": 1.2275319206658215e-06, "loss": 0.0387, "step": 8560 }, { "epoch": 2.34, "grad_norm": 1.6521369990603894, "learning_rate": 1.2265648969670096e-06, "loss": 0.0508, "step": 8561 }, { "epoch": 2.34, "grad_norm": 1.596360838352744, "learning_rate": 1.2255982010642387e-06, "loss": 0.0518, "step": 8562 }, { "epoch": 2.34, "grad_norm": 1.9350441098558337, "learning_rate": 1.2246318330414824e-06, "loss": 0.0468, "step": 8563 }, { "epoch": 2.34, "grad_norm": 1.4171459940900415, "learning_rate": 1.2236657929826917e-06, "loss": 0.0411, "step": 8564 }, { "epoch": 2.34, "grad_norm": 1.60396847608048, "learning_rate": 1.2227000809717838e-06, "loss": 0.05, "step": 8565 }, { "epoch": 2.34, "grad_norm": 1.3460343642192503, "learning_rate": 1.221734697092652e-06, "loss": 0.0376, "step": 8566 }, { "epoch": 2.34, "grad_norm": 1.3750708227058803, "learning_rate": 1.2207696414291563e-06, "loss": 0.0408, "step": 8567 }, { "epoch": 2.34, "grad_norm": 1.6324929934378343, "learning_rate": 1.219804914065132e-06, "loss": 0.0638, "step": 8568 }, { "epoch": 2.34, "grad_norm": 1.353178696400372, "learning_rate": 1.2188405150843812e-06, "loss": 0.0405, "step": 8569 }, { "epoch": 2.34, "grad_norm": 1.2432653278241774, "learning_rate": 1.2178764445706854e-06, "loss": 0.0329, "step": 8570 }, { "epoch": 2.34, "grad_norm": 1.6605459794269863, "learning_rate": 1.2169127026077888e-06, "loss": 0.0497, "step": 8571 }, { "epoch": 2.34, "grad_norm": 1.338599510254725, "learning_rate": 1.2159492892794144e-06, "loss": 0.0398, "step": 8572 }, { "epoch": 2.34, "grad_norm": 1.3337499688084775, "learning_rate": 1.2149862046692513e-06, "loss": 0.041, "step": 8573 }, { "epoch": 2.34, "grad_norm": 1.6871546847299972, "learning_rate": 1.2140234488609631e-06, "loss": 0.0595, "step": 8574 }, { "epoch": 2.34, "grad_norm": 1.3621560394321242, "learning_rate": 1.2130610219381811e-06, "loss": 0.0461, "step": 8575 }, { "epoch": 2.34, "grad_norm": 1.431846331056706, "learning_rate": 1.2120989239845149e-06, "loss": 0.0458, "step": 8576 }, { "epoch": 2.34, "grad_norm": 1.330883434438782, "learning_rate": 1.2111371550835377e-06, "loss": 0.0379, "step": 8577 }, { "epoch": 2.34, "grad_norm": 1.4324720737034748, "learning_rate": 1.210175715318801e-06, "loss": 0.0473, "step": 8578 }, { "epoch": 2.34, "grad_norm": 1.386022627075222, "learning_rate": 1.2092146047738229e-06, "loss": 0.0459, "step": 8579 }, { "epoch": 2.34, "grad_norm": 1.5147285559876726, "learning_rate": 1.2082538235320928e-06, "loss": 0.051, "step": 8580 }, { "epoch": 2.34, "grad_norm": 1.340001570993958, "learning_rate": 1.207293371677077e-06, "loss": 0.0478, "step": 8581 }, { "epoch": 2.34, "grad_norm": 1.3771317047921547, "learning_rate": 1.2063332492922052e-06, "loss": 0.0426, "step": 8582 }, { "epoch": 2.34, "grad_norm": 1.515285126580002, "learning_rate": 1.2053734564608865e-06, "loss": 0.0462, "step": 8583 }, { "epoch": 2.34, "grad_norm": 1.6173835816289572, "learning_rate": 1.2044139932664955e-06, "loss": 0.0519, "step": 8584 }, { "epoch": 2.34, "grad_norm": 1.5601534038519072, "learning_rate": 1.2034548597923812e-06, "loss": 0.0516, "step": 8585 }, { "epoch": 2.34, "grad_norm": 1.2423781192261574, "learning_rate": 1.20249605612186e-06, "loss": 0.0426, "step": 8586 }, { "epoch": 2.34, "grad_norm": 1.396407086050511, "learning_rate": 1.2015375823382264e-06, "loss": 0.0429, "step": 8587 }, { "epoch": 2.34, "grad_norm": 1.5993913145005045, "learning_rate": 1.2005794385247398e-06, "loss": 0.0503, "step": 8588 }, { "epoch": 2.34, "grad_norm": 1.5227206796595838, "learning_rate": 1.199621624764636e-06, "loss": 0.0515, "step": 8589 }, { "epoch": 2.35, "grad_norm": 1.6243251975685995, "learning_rate": 1.1986641411411181e-06, "loss": 0.0524, "step": 8590 }, { "epoch": 2.35, "grad_norm": 1.3810038748179703, "learning_rate": 1.1977069877373625e-06, "loss": 0.042, "step": 8591 }, { "epoch": 2.35, "grad_norm": 1.4217777920582164, "learning_rate": 1.1967501646365147e-06, "loss": 0.0502, "step": 8592 }, { "epoch": 2.35, "grad_norm": 1.6167066473991174, "learning_rate": 1.1957936719216966e-06, "loss": 0.0512, "step": 8593 }, { "epoch": 2.35, "grad_norm": 1.5120967403357835, "learning_rate": 1.1948375096759956e-06, "loss": 0.0517, "step": 8594 }, { "epoch": 2.35, "grad_norm": 1.5946784231182125, "learning_rate": 1.1938816779824753e-06, "loss": 0.0473, "step": 8595 }, { "epoch": 2.35, "grad_norm": 1.7880335970379435, "learning_rate": 1.1929261769241662e-06, "loss": 0.0469, "step": 8596 }, { "epoch": 2.35, "grad_norm": 1.7452235234168656, "learning_rate": 1.1919710065840733e-06, "loss": 0.0496, "step": 8597 }, { "epoch": 2.35, "grad_norm": 1.4659878584255868, "learning_rate": 1.1910161670451697e-06, "loss": 0.0465, "step": 8598 }, { "epoch": 2.35, "grad_norm": 1.652531048565458, "learning_rate": 1.1900616583904046e-06, "loss": 0.0459, "step": 8599 }, { "epoch": 2.35, "grad_norm": 1.5988455220923825, "learning_rate": 1.1891074807026926e-06, "loss": 0.051, "step": 8600 }, { "epoch": 2.35, "grad_norm": 1.3012670161704276, "learning_rate": 1.1881536340649258e-06, "loss": 0.0369, "step": 8601 }, { "epoch": 2.35, "grad_norm": 1.4469568776144042, "learning_rate": 1.1872001185599625e-06, "loss": 0.0459, "step": 8602 }, { "epoch": 2.35, "grad_norm": 1.316046676489979, "learning_rate": 1.186246934270634e-06, "loss": 0.0349, "step": 8603 }, { "epoch": 2.35, "grad_norm": 1.6045879043557556, "learning_rate": 1.185294081279742e-06, "loss": 0.0543, "step": 8604 }, { "epoch": 2.35, "grad_norm": 1.610926294484075, "learning_rate": 1.1843415596700618e-06, "loss": 0.05, "step": 8605 }, { "epoch": 2.35, "grad_norm": 1.4180162785189552, "learning_rate": 1.183389369524337e-06, "loss": 0.0453, "step": 8606 }, { "epoch": 2.35, "grad_norm": 1.4907686049086637, "learning_rate": 1.182437510925286e-06, "loss": 0.0456, "step": 8607 }, { "epoch": 2.35, "grad_norm": 1.5878190486874169, "learning_rate": 1.1814859839555947e-06, "loss": 0.0544, "step": 8608 }, { "epoch": 2.35, "grad_norm": 1.3623270645943937, "learning_rate": 1.1805347886979219e-06, "loss": 0.0442, "step": 8609 }, { "epoch": 2.35, "grad_norm": 1.599371467693969, "learning_rate": 1.1795839252348957e-06, "loss": 0.0453, "step": 8610 }, { "epoch": 2.35, "grad_norm": 1.4999803437332517, "learning_rate": 1.17863339364912e-06, "loss": 0.0509, "step": 8611 }, { "epoch": 2.35, "grad_norm": 1.59768194251578, "learning_rate": 1.1776831940231642e-06, "loss": 0.0597, "step": 8612 }, { "epoch": 2.35, "grad_norm": 1.6869592713976012, "learning_rate": 1.1767333264395735e-06, "loss": 0.0438, "step": 8613 }, { "epoch": 2.35, "grad_norm": 1.449916767455108, "learning_rate": 1.1757837909808628e-06, "loss": 0.043, "step": 8614 }, { "epoch": 2.35, "grad_norm": 1.576697244086845, "learning_rate": 1.1748345877295158e-06, "loss": 0.0621, "step": 8615 }, { "epoch": 2.35, "grad_norm": 1.6121585874276614, "learning_rate": 1.1738857167679884e-06, "loss": 0.0486, "step": 8616 }, { "epoch": 2.35, "grad_norm": 1.610681863397244, "learning_rate": 1.1729371781787119e-06, "loss": 0.0442, "step": 8617 }, { "epoch": 2.35, "grad_norm": 1.589676945449953, "learning_rate": 1.171988972044082e-06, "loss": 0.0526, "step": 8618 }, { "epoch": 2.35, "grad_norm": 1.4328779255876392, "learning_rate": 1.1710410984464716e-06, "loss": 0.0466, "step": 8619 }, { "epoch": 2.35, "grad_norm": 1.5986272448433725, "learning_rate": 1.1700935574682204e-06, "loss": 0.0476, "step": 8620 }, { "epoch": 2.35, "grad_norm": 1.3945821234217615, "learning_rate": 1.1691463491916404e-06, "loss": 0.0424, "step": 8621 }, { "epoch": 2.35, "grad_norm": 1.3581316199233786, "learning_rate": 1.1681994736990143e-06, "loss": 0.0421, "step": 8622 }, { "epoch": 2.35, "grad_norm": 1.616051125250449, "learning_rate": 1.1672529310725995e-06, "loss": 0.0449, "step": 8623 }, { "epoch": 2.35, "grad_norm": 1.513672825527011, "learning_rate": 1.1663067213946177e-06, "loss": 0.0478, "step": 8624 }, { "epoch": 2.35, "grad_norm": 1.581738660379025, "learning_rate": 1.1653608447472698e-06, "loss": 0.0452, "step": 8625 }, { "epoch": 2.35, "grad_norm": 1.6175716375464169, "learning_rate": 1.1644153012127208e-06, "loss": 0.0499, "step": 8626 }, { "epoch": 2.36, "grad_norm": 1.6567081946869795, "learning_rate": 1.1634700908731106e-06, "loss": 0.0563, "step": 8627 }, { "epoch": 2.36, "grad_norm": 1.5909292535527835, "learning_rate": 1.162525213810547e-06, "loss": 0.042, "step": 8628 }, { "epoch": 2.36, "grad_norm": 1.3541146481365631, "learning_rate": 1.1615806701071137e-06, "loss": 0.0427, "step": 8629 }, { "epoch": 2.36, "grad_norm": 1.372837942420161, "learning_rate": 1.1606364598448605e-06, "loss": 0.0444, "step": 8630 }, { "epoch": 2.36, "grad_norm": 1.5627315361978833, "learning_rate": 1.159692583105812e-06, "loss": 0.053, "step": 8631 }, { "epoch": 2.36, "grad_norm": 1.5419208587621287, "learning_rate": 1.158749039971962e-06, "loss": 0.049, "step": 8632 }, { "epoch": 2.36, "grad_norm": 1.2840754490400847, "learning_rate": 1.157805830525275e-06, "loss": 0.0346, "step": 8633 }, { "epoch": 2.36, "grad_norm": 1.4843195866467578, "learning_rate": 1.1568629548476856e-06, "loss": 0.0396, "step": 8634 }, { "epoch": 2.36, "grad_norm": 1.5814382541359278, "learning_rate": 1.1559204130211039e-06, "loss": 0.0528, "step": 8635 }, { "epoch": 2.36, "grad_norm": 1.7951448828343117, "learning_rate": 1.1549782051274045e-06, "loss": 0.0465, "step": 8636 }, { "epoch": 2.36, "grad_norm": 1.6467678756415827, "learning_rate": 1.15403633124844e-06, "loss": 0.0452, "step": 8637 }, { "epoch": 2.36, "grad_norm": 1.2946089299435162, "learning_rate": 1.1530947914660285e-06, "loss": 0.0456, "step": 8638 }, { "epoch": 2.36, "grad_norm": 1.84400729011203, "learning_rate": 1.1521535858619615e-06, "loss": 0.0394, "step": 8639 }, { "epoch": 2.36, "grad_norm": 1.724159872037726, "learning_rate": 1.151212714517999e-06, "loss": 0.0523, "step": 8640 }, { "epoch": 2.36, "grad_norm": 1.6081840643448688, "learning_rate": 1.1502721775158772e-06, "loss": 0.0494, "step": 8641 }, { "epoch": 2.36, "grad_norm": 1.282742011640965, "learning_rate": 1.1493319749372967e-06, "loss": 0.0413, "step": 8642 }, { "epoch": 2.36, "grad_norm": 1.4778855809345055, "learning_rate": 1.1483921068639353e-06, "loss": 0.0445, "step": 8643 }, { "epoch": 2.36, "grad_norm": 1.3695715773883415, "learning_rate": 1.1474525733774377e-06, "loss": 0.0463, "step": 8644 }, { "epoch": 2.36, "grad_norm": 1.576472202366201, "learning_rate": 1.1465133745594203e-06, "loss": 0.0561, "step": 8645 }, { "epoch": 2.36, "grad_norm": 1.5425734133063451, "learning_rate": 1.14557451049147e-06, "loss": 0.0472, "step": 8646 }, { "epoch": 2.36, "grad_norm": 1.5060810439061327, "learning_rate": 1.1446359812551473e-06, "loss": 0.0499, "step": 8647 }, { "epoch": 2.36, "grad_norm": 1.6021134994409165, "learning_rate": 1.1436977869319787e-06, "loss": 0.0472, "step": 8648 }, { "epoch": 2.36, "grad_norm": 1.4865952915573266, "learning_rate": 1.1427599276034685e-06, "loss": 0.0431, "step": 8649 }, { "epoch": 2.36, "grad_norm": 1.3829558053150885, "learning_rate": 1.1418224033510855e-06, "loss": 0.0402, "step": 8650 }, { "epoch": 2.36, "grad_norm": 1.535414289528933, "learning_rate": 1.140885214256272e-06, "loss": 0.0577, "step": 8651 }, { "epoch": 2.36, "grad_norm": 1.7243132175440012, "learning_rate": 1.1399483604004403e-06, "loss": 0.0543, "step": 8652 }, { "epoch": 2.36, "grad_norm": 1.5194590495364049, "learning_rate": 1.139011841864977e-06, "loss": 0.0491, "step": 8653 }, { "epoch": 2.36, "grad_norm": 1.717303260322061, "learning_rate": 1.1380756587312335e-06, "loss": 0.0567, "step": 8654 }, { "epoch": 2.36, "grad_norm": 1.4285278869234561, "learning_rate": 1.1371398110805386e-06, "loss": 0.0443, "step": 8655 }, { "epoch": 2.36, "grad_norm": 1.5087968700629493, "learning_rate": 1.136204298994188e-06, "loss": 0.0465, "step": 8656 }, { "epoch": 2.36, "grad_norm": 1.3287636887802292, "learning_rate": 1.135269122553448e-06, "loss": 0.0374, "step": 8657 }, { "epoch": 2.36, "grad_norm": 1.5752362278404695, "learning_rate": 1.1343342818395558e-06, "loss": 0.0466, "step": 8658 }, { "epoch": 2.36, "grad_norm": 1.486605507161374, "learning_rate": 1.133399776933724e-06, "loss": 0.0518, "step": 8659 }, { "epoch": 2.36, "grad_norm": 1.7163191113180185, "learning_rate": 1.1324656079171288e-06, "loss": 0.0513, "step": 8660 }, { "epoch": 2.36, "grad_norm": 1.493976505178587, "learning_rate": 1.1315317748709237e-06, "loss": 0.0457, "step": 8661 }, { "epoch": 2.36, "grad_norm": 1.2073708834407713, "learning_rate": 1.1305982778762291e-06, "loss": 0.0371, "step": 8662 }, { "epoch": 2.37, "grad_norm": 1.3983621879023187, "learning_rate": 1.1296651170141376e-06, "loss": 0.0424, "step": 8663 }, { "epoch": 2.37, "grad_norm": 1.4080438562072919, "learning_rate": 1.1287322923657106e-06, "loss": 0.0457, "step": 8664 }, { "epoch": 2.37, "grad_norm": 1.5453861729578757, "learning_rate": 1.1277998040119853e-06, "loss": 0.0388, "step": 8665 }, { "epoch": 2.37, "grad_norm": 1.4268554747247901, "learning_rate": 1.1268676520339628e-06, "loss": 0.0431, "step": 8666 }, { "epoch": 2.37, "grad_norm": 1.5741436458767701, "learning_rate": 1.1259358365126217e-06, "loss": 0.047, "step": 8667 }, { "epoch": 2.37, "grad_norm": 1.2473785471110066, "learning_rate": 1.1250043575289065e-06, "loss": 0.0355, "step": 8668 }, { "epoch": 2.37, "grad_norm": 1.6203638563143967, "learning_rate": 1.1240732151637352e-06, "loss": 0.0471, "step": 8669 }, { "epoch": 2.37, "grad_norm": 1.627235085690202, "learning_rate": 1.1231424094979932e-06, "loss": 0.0512, "step": 8670 }, { "epoch": 2.37, "grad_norm": 1.4865486633142886, "learning_rate": 1.1222119406125426e-06, "loss": 0.0497, "step": 8671 }, { "epoch": 2.37, "grad_norm": 1.9003270357968307, "learning_rate": 1.1212818085882094e-06, "loss": 0.0495, "step": 8672 }, { "epoch": 2.37, "grad_norm": 1.6082930753624527, "learning_rate": 1.120352013505796e-06, "loss": 0.0532, "step": 8673 }, { "epoch": 2.37, "grad_norm": 1.5603501555908568, "learning_rate": 1.1194225554460725e-06, "loss": 0.0441, "step": 8674 }, { "epoch": 2.37, "grad_norm": 1.6501065297966977, "learning_rate": 1.118493434489779e-06, "loss": 0.0511, "step": 8675 }, { "epoch": 2.37, "grad_norm": 1.5138720737797267, "learning_rate": 1.1175646507176302e-06, "loss": 0.0559, "step": 8676 }, { "epoch": 2.37, "grad_norm": 1.5372216024952963, "learning_rate": 1.1166362042103056e-06, "loss": 0.0524, "step": 8677 }, { "epoch": 2.37, "grad_norm": 1.3272649858523338, "learning_rate": 1.1157080950484628e-06, "loss": 0.0441, "step": 8678 }, { "epoch": 2.37, "grad_norm": 1.4882585394961207, "learning_rate": 1.1147803233127241e-06, "loss": 0.0474, "step": 8679 }, { "epoch": 2.37, "grad_norm": 1.4134773122234536, "learning_rate": 1.1138528890836842e-06, "loss": 0.0459, "step": 8680 }, { "epoch": 2.37, "grad_norm": 1.4280170020970242, "learning_rate": 1.1129257924419074e-06, "loss": 0.0513, "step": 8681 }, { "epoch": 2.37, "grad_norm": 1.3179120664920447, "learning_rate": 1.111999033467933e-06, "loss": 0.047, "step": 8682 }, { "epoch": 2.37, "grad_norm": 1.5697454101463022, "learning_rate": 1.1110726122422654e-06, "loss": 0.0484, "step": 8683 }, { "epoch": 2.37, "grad_norm": 1.3311403900360779, "learning_rate": 1.110146528845385e-06, "loss": 0.0335, "step": 8684 }, { "epoch": 2.37, "grad_norm": 1.4230840686567787, "learning_rate": 1.1092207833577384e-06, "loss": 0.0349, "step": 8685 }, { "epoch": 2.37, "grad_norm": 1.3892815891291426, "learning_rate": 1.1082953758597447e-06, "loss": 0.0392, "step": 8686 }, { "epoch": 2.37, "grad_norm": 1.307990506217155, "learning_rate": 1.107370306431792e-06, "loss": 0.04, "step": 8687 }, { "epoch": 2.37, "grad_norm": 1.5814182920239135, "learning_rate": 1.1064455751542436e-06, "loss": 0.0438, "step": 8688 }, { "epoch": 2.37, "grad_norm": 1.343865160788658, "learning_rate": 1.1055211821074275e-06, "loss": 0.0349, "step": 8689 }, { "epoch": 2.37, "grad_norm": 1.2723597488772327, "learning_rate": 1.1045971273716476e-06, "loss": 0.0412, "step": 8690 }, { "epoch": 2.37, "grad_norm": 1.4912736167483633, "learning_rate": 1.1036734110271753e-06, "loss": 0.0484, "step": 8691 }, { "epoch": 2.37, "grad_norm": 1.7384963574201484, "learning_rate": 1.1027500331542523e-06, "loss": 0.0504, "step": 8692 }, { "epoch": 2.37, "grad_norm": 1.5853992787740907, "learning_rate": 1.1018269938330912e-06, "loss": 0.0522, "step": 8693 }, { "epoch": 2.37, "grad_norm": 1.4642178130049124, "learning_rate": 1.1009042931438784e-06, "loss": 0.0471, "step": 8694 }, { "epoch": 2.37, "grad_norm": 1.4127145072286977, "learning_rate": 1.0999819311667658e-06, "loss": 0.0495, "step": 8695 }, { "epoch": 2.37, "grad_norm": 1.5683912330819498, "learning_rate": 1.099059907981881e-06, "loss": 0.0473, "step": 8696 }, { "epoch": 2.37, "grad_norm": 1.4596551246417173, "learning_rate": 1.0981382236693184e-06, "loss": 0.0461, "step": 8697 }, { "epoch": 2.37, "grad_norm": 1.635664357936403, "learning_rate": 1.0972168783091436e-06, "loss": 0.0566, "step": 8698 }, { "epoch": 2.37, "grad_norm": 1.3264279600670759, "learning_rate": 1.0962958719813926e-06, "loss": 0.0471, "step": 8699 }, { "epoch": 2.38, "grad_norm": 1.257315618850486, "learning_rate": 1.0953752047660754e-06, "loss": 0.0393, "step": 8700 }, { "epoch": 2.38, "grad_norm": 1.341497022512968, "learning_rate": 1.0944548767431667e-06, "loss": 0.039, "step": 8701 }, { "epoch": 2.38, "grad_norm": 1.4645846401110711, "learning_rate": 1.0935348879926178e-06, "loss": 0.0419, "step": 8702 }, { "epoch": 2.38, "grad_norm": 1.4038228855487405, "learning_rate": 1.0926152385943456e-06, "loss": 0.0367, "step": 8703 }, { "epoch": 2.38, "grad_norm": 1.8403982762513762, "learning_rate": 1.0916959286282409e-06, "loss": 0.0513, "step": 8704 }, { "epoch": 2.38, "grad_norm": 1.4067659706810443, "learning_rate": 1.0907769581741606e-06, "loss": 0.0458, "step": 8705 }, { "epoch": 2.38, "grad_norm": 1.354428472313196, "learning_rate": 1.089858327311939e-06, "loss": 0.0408, "step": 8706 }, { "epoch": 2.38, "grad_norm": 1.5481662604829547, "learning_rate": 1.0889400361213737e-06, "loss": 0.0607, "step": 8707 }, { "epoch": 2.38, "grad_norm": 1.744281711896534, "learning_rate": 1.0880220846822392e-06, "loss": 0.0537, "step": 8708 }, { "epoch": 2.38, "grad_norm": 1.4002141535611814, "learning_rate": 1.0871044730742752e-06, "loss": 0.0502, "step": 8709 }, { "epoch": 2.38, "grad_norm": 1.5578376821595767, "learning_rate": 1.0861872013771958e-06, "loss": 0.0511, "step": 8710 }, { "epoch": 2.38, "grad_norm": 1.7716819235603667, "learning_rate": 1.0852702696706807e-06, "loss": 0.0537, "step": 8711 }, { "epoch": 2.38, "grad_norm": 1.6727194201491118, "learning_rate": 1.0843536780343866e-06, "loss": 0.0456, "step": 8712 }, { "epoch": 2.38, "grad_norm": 1.49711488513233, "learning_rate": 1.0834374265479347e-06, "loss": 0.0458, "step": 8713 }, { "epoch": 2.38, "grad_norm": 1.6697889951619662, "learning_rate": 1.082521515290922e-06, "loss": 0.0463, "step": 8714 }, { "epoch": 2.38, "grad_norm": 1.1893122752854486, "learning_rate": 1.081605944342911e-06, "loss": 0.032, "step": 8715 }, { "epoch": 2.38, "grad_norm": 1.3788904308151453, "learning_rate": 1.0806907137834377e-06, "loss": 0.0423, "step": 8716 }, { "epoch": 2.38, "grad_norm": 1.6836252583564824, "learning_rate": 1.0797758236920063e-06, "loss": 0.0557, "step": 8717 }, { "epoch": 2.38, "grad_norm": 1.7030975644250759, "learning_rate": 1.0788612741480947e-06, "loss": 0.0438, "step": 8718 }, { "epoch": 2.38, "grad_norm": 1.7254683561152628, "learning_rate": 1.0779470652311475e-06, "loss": 0.0475, "step": 8719 }, { "epoch": 2.38, "grad_norm": 1.3649290956955344, "learning_rate": 1.0770331970205834e-06, "loss": 0.043, "step": 8720 }, { "epoch": 2.38, "grad_norm": 1.718566869492126, "learning_rate": 1.0761196695957882e-06, "loss": 0.0514, "step": 8721 }, { "epoch": 2.38, "grad_norm": 1.4067620604936915, "learning_rate": 1.0752064830361202e-06, "loss": 0.0402, "step": 8722 }, { "epoch": 2.38, "grad_norm": 1.5341310704741666, "learning_rate": 1.0742936374209056e-06, "loss": 0.046, "step": 8723 }, { "epoch": 2.38, "grad_norm": 1.3092533755309306, "learning_rate": 1.0733811328294453e-06, "loss": 0.0414, "step": 8724 }, { "epoch": 2.38, "grad_norm": 1.3943252796019423, "learning_rate": 1.0724689693410052e-06, "loss": 0.0463, "step": 8725 }, { "epoch": 2.38, "grad_norm": 1.3947905734558317, "learning_rate": 1.071557147034828e-06, "loss": 0.0546, "step": 8726 }, { "epoch": 2.38, "grad_norm": 1.7754939271621943, "learning_rate": 1.0706456659901204e-06, "loss": 0.0586, "step": 8727 }, { "epoch": 2.38, "grad_norm": 1.537108118182416, "learning_rate": 1.0697345262860638e-06, "loss": 0.0379, "step": 8728 }, { "epoch": 2.38, "grad_norm": 1.6417435736150496, "learning_rate": 1.068823728001806e-06, "loss": 0.0504, "step": 8729 }, { "epoch": 2.38, "grad_norm": 1.4142194029666615, "learning_rate": 1.0679132712164702e-06, "loss": 0.0426, "step": 8730 }, { "epoch": 2.38, "grad_norm": 1.448040446353482, "learning_rate": 1.067003156009145e-06, "loss": 0.0515, "step": 8731 }, { "epoch": 2.38, "grad_norm": 1.4846537490367535, "learning_rate": 1.0660933824588932e-06, "loss": 0.0485, "step": 8732 }, { "epoch": 2.38, "grad_norm": 1.5657631118437902, "learning_rate": 1.0651839506447464e-06, "loss": 0.0498, "step": 8733 }, { "epoch": 2.38, "grad_norm": 1.4579179848584627, "learning_rate": 1.064274860645706e-06, "loss": 0.0483, "step": 8734 }, { "epoch": 2.38, "grad_norm": 1.589541583739505, "learning_rate": 1.0633661125407418e-06, "loss": 0.0506, "step": 8735 }, { "epoch": 2.38, "grad_norm": 1.506847849878915, "learning_rate": 1.0624577064087998e-06, "loss": 0.0417, "step": 8736 }, { "epoch": 2.39, "grad_norm": 1.3345558905313566, "learning_rate": 1.0615496423287896e-06, "loss": 0.0456, "step": 8737 }, { "epoch": 2.39, "grad_norm": 1.5341032669021522, "learning_rate": 1.0606419203795975e-06, "loss": 0.0484, "step": 8738 }, { "epoch": 2.39, "grad_norm": 1.7359820339919008, "learning_rate": 1.059734540640075e-06, "loss": 0.0505, "step": 8739 }, { "epoch": 2.39, "grad_norm": 1.2955877374309666, "learning_rate": 1.0588275031890455e-06, "loss": 0.0378, "step": 8740 }, { "epoch": 2.39, "grad_norm": 1.4003891859511555, "learning_rate": 1.057920808105301e-06, "loss": 0.0448, "step": 8741 }, { "epoch": 2.39, "grad_norm": 1.3820908158222436, "learning_rate": 1.0570144554676092e-06, "loss": 0.0418, "step": 8742 }, { "epoch": 2.39, "grad_norm": 1.4152866756912188, "learning_rate": 1.0561084453547016e-06, "loss": 0.0462, "step": 8743 }, { "epoch": 2.39, "grad_norm": 1.4233780016520368, "learning_rate": 1.055202777845285e-06, "loss": 0.0403, "step": 8744 }, { "epoch": 2.39, "grad_norm": 1.5129650197065576, "learning_rate": 1.0542974530180327e-06, "loss": 0.05, "step": 8745 }, { "epoch": 2.39, "grad_norm": 1.3690041538815456, "learning_rate": 1.0533924709515902e-06, "loss": 0.0455, "step": 8746 }, { "epoch": 2.39, "grad_norm": 1.4596056520261682, "learning_rate": 1.0524878317245713e-06, "loss": 0.0504, "step": 8747 }, { "epoch": 2.39, "grad_norm": 1.4479850841049984, "learning_rate": 1.051583535415564e-06, "loss": 0.0365, "step": 8748 }, { "epoch": 2.39, "grad_norm": 1.373508659862148, "learning_rate": 1.0506795821031212e-06, "loss": 0.0464, "step": 8749 }, { "epoch": 2.39, "grad_norm": 1.6301214825816965, "learning_rate": 1.049775971865772e-06, "loss": 0.0498, "step": 8750 }, { "epoch": 2.39, "grad_norm": 1.3193886075016072, "learning_rate": 1.0488727047820108e-06, "loss": 0.033, "step": 8751 }, { "epoch": 2.39, "grad_norm": 1.613290170834081, "learning_rate": 1.0479697809303035e-06, "loss": 0.0516, "step": 8752 }, { "epoch": 2.39, "grad_norm": 1.5584302344052667, "learning_rate": 1.0470672003890858e-06, "loss": 0.0433, "step": 8753 }, { "epoch": 2.39, "grad_norm": 1.4777825509915168, "learning_rate": 1.046164963236767e-06, "loss": 0.0501, "step": 8754 }, { "epoch": 2.39, "grad_norm": 1.5072193656964288, "learning_rate": 1.0452630695517208e-06, "loss": 0.0427, "step": 8755 }, { "epoch": 2.39, "grad_norm": 1.4850964624208396, "learning_rate": 1.0443615194122969e-06, "loss": 0.0463, "step": 8756 }, { "epoch": 2.39, "grad_norm": 1.4646351833393905, "learning_rate": 1.0434603128968112e-06, "loss": 0.0469, "step": 8757 }, { "epoch": 2.39, "grad_norm": 1.2784578683891326, "learning_rate": 1.0425594500835512e-06, "loss": 0.0404, "step": 8758 }, { "epoch": 2.39, "grad_norm": 1.6368838695048964, "learning_rate": 1.0416589310507723e-06, "loss": 0.0489, "step": 8759 }, { "epoch": 2.39, "grad_norm": 1.509262635574599, "learning_rate": 1.0407587558767056e-06, "loss": 0.0494, "step": 8760 }, { "epoch": 2.39, "grad_norm": 1.6624956555128414, "learning_rate": 1.0398589246395457e-06, "loss": 0.0536, "step": 8761 }, { "epoch": 2.39, "grad_norm": 1.4471894983615914, "learning_rate": 1.0389594374174628e-06, "loss": 0.0434, "step": 8762 }, { "epoch": 2.39, "grad_norm": 1.5689389000444693, "learning_rate": 1.0380602942885937e-06, "loss": 0.0486, "step": 8763 }, { "epoch": 2.39, "grad_norm": 1.59127909641896, "learning_rate": 1.0371614953310465e-06, "loss": 0.053, "step": 8764 }, { "epoch": 2.39, "grad_norm": 1.5117878643022244, "learning_rate": 1.0362630406228986e-06, "loss": 0.0434, "step": 8765 }, { "epoch": 2.39, "grad_norm": 1.430700174163597, "learning_rate": 1.0353649302421982e-06, "loss": 0.0474, "step": 8766 }, { "epoch": 2.39, "grad_norm": 1.8450598099289466, "learning_rate": 1.0344671642669656e-06, "loss": 0.0497, "step": 8767 }, { "epoch": 2.39, "grad_norm": 1.2708096092054417, "learning_rate": 1.033569742775188e-06, "loss": 0.0379, "step": 8768 }, { "epoch": 2.39, "grad_norm": 1.632131534412719, "learning_rate": 1.0326726658448238e-06, "loss": 0.0532, "step": 8769 }, { "epoch": 2.39, "grad_norm": 1.245203290786845, "learning_rate": 1.0317759335538002e-06, "loss": 0.0378, "step": 8770 }, { "epoch": 2.39, "grad_norm": 1.3530264251542963, "learning_rate": 1.0308795459800186e-06, "loss": 0.0441, "step": 8771 }, { "epoch": 2.39, "grad_norm": 1.4332085647035122, "learning_rate": 1.029983503201345e-06, "loss": 0.0444, "step": 8772 }, { "epoch": 2.4, "grad_norm": 1.400410541932451, "learning_rate": 1.02908780529562e-06, "loss": 0.0425, "step": 8773 }, { "epoch": 2.4, "grad_norm": 1.6666841004742612, "learning_rate": 1.0281924523406518e-06, "loss": 0.0504, "step": 8774 }, { "epoch": 2.4, "grad_norm": 1.4666928689361818, "learning_rate": 1.0272974444142192e-06, "loss": 0.0481, "step": 8775 }, { "epoch": 2.4, "grad_norm": 1.28456884729643, "learning_rate": 1.0264027815940692e-06, "loss": 0.0376, "step": 8776 }, { "epoch": 2.4, "grad_norm": 1.8066625105372123, "learning_rate": 1.0255084639579232e-06, "loss": 0.0537, "step": 8777 }, { "epoch": 2.4, "grad_norm": 1.5567682959897602, "learning_rate": 1.0246144915834683e-06, "loss": 0.0512, "step": 8778 }, { "epoch": 2.4, "grad_norm": 1.7222915824930451, "learning_rate": 1.0237208645483648e-06, "loss": 0.0506, "step": 8779 }, { "epoch": 2.4, "grad_norm": 1.607556983171655, "learning_rate": 1.0228275829302415e-06, "loss": 0.0509, "step": 8780 }, { "epoch": 2.4, "grad_norm": 1.8842353193484842, "learning_rate": 1.021934646806696e-06, "loss": 0.059, "step": 8781 }, { "epoch": 2.4, "grad_norm": 1.5680425666035738, "learning_rate": 1.0210420562552963e-06, "loss": 0.0432, "step": 8782 }, { "epoch": 2.4, "grad_norm": 1.6600658814451268, "learning_rate": 1.020149811353584e-06, "loss": 0.0407, "step": 8783 }, { "epoch": 2.4, "grad_norm": 1.5964090690810402, "learning_rate": 1.0192579121790652e-06, "loss": 0.0457, "step": 8784 }, { "epoch": 2.4, "grad_norm": 1.4714120993328248, "learning_rate": 1.0183663588092214e-06, "loss": 0.0503, "step": 8785 }, { "epoch": 2.4, "grad_norm": 1.5384554420353587, "learning_rate": 1.0174751513214992e-06, "loss": 0.0483, "step": 8786 }, { "epoch": 2.4, "grad_norm": 1.4258340331164856, "learning_rate": 1.0165842897933188e-06, "loss": 0.0462, "step": 8787 }, { "epoch": 2.4, "grad_norm": 2.0253356486129674, "learning_rate": 1.0156937743020657e-06, "loss": 0.0515, "step": 8788 }, { "epoch": 2.4, "grad_norm": 1.3714356621762054, "learning_rate": 1.014803604925102e-06, "loss": 0.0444, "step": 8789 }, { "epoch": 2.4, "grad_norm": 1.542973732715464, "learning_rate": 1.0139137817397537e-06, "loss": 0.0418, "step": 8790 }, { "epoch": 2.4, "grad_norm": 1.5581835248773026, "learning_rate": 1.013024304823322e-06, "loss": 0.0527, "step": 8791 }, { "epoch": 2.4, "grad_norm": 1.7057100004497252, "learning_rate": 1.0121351742530728e-06, "loss": 0.0464, "step": 8792 }, { "epoch": 2.4, "grad_norm": 1.5767212206346282, "learning_rate": 1.0112463901062453e-06, "loss": 0.0478, "step": 8793 }, { "epoch": 2.4, "grad_norm": 1.5295288425758895, "learning_rate": 1.010357952460046e-06, "loss": 0.051, "step": 8794 }, { "epoch": 2.4, "grad_norm": 1.613798376132376, "learning_rate": 1.0094698613916558e-06, "loss": 0.0457, "step": 8795 }, { "epoch": 2.4, "grad_norm": 1.6117486954109663, "learning_rate": 1.00858211697822e-06, "loss": 0.0461, "step": 8796 }, { "epoch": 2.4, "grad_norm": 1.5788641001584318, "learning_rate": 1.007694719296859e-06, "loss": 0.0496, "step": 8797 }, { "epoch": 2.4, "grad_norm": 1.6846278568899848, "learning_rate": 1.0068076684246586e-06, "loss": 0.0584, "step": 8798 }, { "epoch": 2.4, "grad_norm": 1.5396276638518092, "learning_rate": 1.0059209644386775e-06, "loss": 0.044, "step": 8799 }, { "epoch": 2.4, "grad_norm": 1.6523128589519536, "learning_rate": 1.0050346074159406e-06, "loss": 0.0472, "step": 8800 }, { "epoch": 2.4, "grad_norm": 1.5279275546385982, "learning_rate": 1.0041485974334493e-06, "loss": 0.0511, "step": 8801 }, { "epoch": 2.4, "grad_norm": 1.4378350064755228, "learning_rate": 1.0032629345681666e-06, "loss": 0.0539, "step": 8802 }, { "epoch": 2.4, "grad_norm": 1.603667699107373, "learning_rate": 1.0023776188970325e-06, "loss": 0.0544, "step": 8803 }, { "epoch": 2.4, "grad_norm": 1.5081346375230482, "learning_rate": 1.0014926504969535e-06, "loss": 0.0515, "step": 8804 }, { "epoch": 2.4, "grad_norm": 1.214810196934005, "learning_rate": 1.000608029444805e-06, "loss": 0.0412, "step": 8805 }, { "epoch": 2.4, "grad_norm": 1.4006742833955323, "learning_rate": 9.997237558174334e-07, "loss": 0.04, "step": 8806 }, { "epoch": 2.4, "grad_norm": 1.4129550328153881, "learning_rate": 9.988398296916569e-07, "loss": 0.0458, "step": 8807 }, { "epoch": 2.4, "grad_norm": 1.341890443918045, "learning_rate": 9.979562511442586e-07, "loss": 0.0484, "step": 8808 }, { "epoch": 2.4, "grad_norm": 1.4135832754950768, "learning_rate": 9.970730202519986e-07, "loss": 0.0423, "step": 8809 }, { "epoch": 2.41, "grad_norm": 1.373944204233886, "learning_rate": 9.961901370915994e-07, "loss": 0.0488, "step": 8810 }, { "epoch": 2.41, "grad_norm": 1.3851600214535538, "learning_rate": 9.953076017397579e-07, "loss": 0.041, "step": 8811 }, { "epoch": 2.41, "grad_norm": 1.2682180285044817, "learning_rate": 9.944254142731375e-07, "loss": 0.0408, "step": 8812 }, { "epoch": 2.41, "grad_norm": 1.3586509882649467, "learning_rate": 9.935435747683758e-07, "loss": 0.0475, "step": 8813 }, { "epoch": 2.41, "grad_norm": 1.5952142618839091, "learning_rate": 9.926620833020755e-07, "loss": 0.0555, "step": 8814 }, { "epoch": 2.41, "grad_norm": 1.4946890099049241, "learning_rate": 9.917809399508144e-07, "loss": 0.0453, "step": 8815 }, { "epoch": 2.41, "grad_norm": 1.2764898861252318, "learning_rate": 9.909001447911336e-07, "loss": 0.0419, "step": 8816 }, { "epoch": 2.41, "grad_norm": 1.6023593000761078, "learning_rate": 9.900196978995497e-07, "loss": 0.0479, "step": 8817 }, { "epoch": 2.41, "grad_norm": 1.4781385659474782, "learning_rate": 9.891395993525433e-07, "loss": 0.0491, "step": 8818 }, { "epoch": 2.41, "grad_norm": 1.5359673023133742, "learning_rate": 9.882598492265716e-07, "loss": 0.0489, "step": 8819 }, { "epoch": 2.41, "grad_norm": 1.2640913978210047, "learning_rate": 9.873804475980552e-07, "loss": 0.0361, "step": 8820 }, { "epoch": 2.41, "grad_norm": 1.4363770850360897, "learning_rate": 9.865013945433905e-07, "loss": 0.0451, "step": 8821 }, { "epoch": 2.41, "grad_norm": 1.3441206019644, "learning_rate": 9.856226901389376e-07, "loss": 0.0402, "step": 8822 }, { "epoch": 2.41, "grad_norm": 1.4839445448950859, "learning_rate": 9.847443344610296e-07, "loss": 0.0417, "step": 8823 }, { "epoch": 2.41, "grad_norm": 1.558399843938944, "learning_rate": 9.838663275859678e-07, "loss": 0.0507, "step": 8824 }, { "epoch": 2.41, "grad_norm": 1.5097129476885902, "learning_rate": 9.829886695900265e-07, "loss": 0.0467, "step": 8825 }, { "epoch": 2.41, "grad_norm": 1.4783404184696236, "learning_rate": 9.821113605494449e-07, "loss": 0.0451, "step": 8826 }, { "epoch": 2.41, "grad_norm": 1.3442201264781266, "learning_rate": 9.812344005404361e-07, "loss": 0.0397, "step": 8827 }, { "epoch": 2.41, "grad_norm": 1.3151017061671793, "learning_rate": 9.803577896391809e-07, "loss": 0.0369, "step": 8828 }, { "epoch": 2.41, "grad_norm": 1.7710901547375577, "learning_rate": 9.794815279218288e-07, "loss": 0.0521, "step": 8829 }, { "epoch": 2.41, "grad_norm": 1.483713453038063, "learning_rate": 9.786056154645001e-07, "loss": 0.052, "step": 8830 }, { "epoch": 2.41, "grad_norm": 1.4099590464477867, "learning_rate": 9.77730052343287e-07, "loss": 0.0489, "step": 8831 }, { "epoch": 2.41, "grad_norm": 1.6979591030207262, "learning_rate": 9.768548386342458e-07, "loss": 0.0527, "step": 8832 }, { "epoch": 2.41, "grad_norm": 1.6792590347500782, "learning_rate": 9.75979974413409e-07, "loss": 0.0555, "step": 8833 }, { "epoch": 2.41, "grad_norm": 1.076762898293552, "learning_rate": 9.751054597567744e-07, "loss": 0.0322, "step": 8834 }, { "epoch": 2.41, "grad_norm": 1.4642910314927935, "learning_rate": 9.742312947403103e-07, "loss": 0.049, "step": 8835 }, { "epoch": 2.41, "grad_norm": 1.4940258544132559, "learning_rate": 9.733574794399537e-07, "loss": 0.0444, "step": 8836 }, { "epoch": 2.41, "grad_norm": 1.4037131497400552, "learning_rate": 9.724840139316144e-07, "loss": 0.0421, "step": 8837 }, { "epoch": 2.41, "grad_norm": 1.5103223092410143, "learning_rate": 9.71610898291168e-07, "loss": 0.0479, "step": 8838 }, { "epoch": 2.41, "grad_norm": 1.2864339785832624, "learning_rate": 9.707381325944642e-07, "loss": 0.0416, "step": 8839 }, { "epoch": 2.41, "grad_norm": 1.3260510759892015, "learning_rate": 9.698657169173176e-07, "loss": 0.042, "step": 8840 }, { "epoch": 2.41, "grad_norm": 1.5442615736113157, "learning_rate": 9.689936513355147e-07, "loss": 0.0496, "step": 8841 }, { "epoch": 2.41, "grad_norm": 1.3643606418963607, "learning_rate": 9.681219359248106e-07, "loss": 0.0406, "step": 8842 }, { "epoch": 2.41, "grad_norm": 1.5220865572806, "learning_rate": 9.672505707609326e-07, "loss": 0.0537, "step": 8843 }, { "epoch": 2.41, "grad_norm": 1.182433106239767, "learning_rate": 9.663795559195733e-07, "loss": 0.0317, "step": 8844 }, { "epoch": 2.41, "grad_norm": 1.5795583322988773, "learning_rate": 9.655088914763994e-07, "loss": 0.0515, "step": 8845 }, { "epoch": 2.41, "grad_norm": 1.4861466924809996, "learning_rate": 9.646385775070444e-07, "loss": 0.0432, "step": 8846 }, { "epoch": 2.42, "grad_norm": 1.1290982155382336, "learning_rate": 9.637686140871121e-07, "loss": 0.0391, "step": 8847 }, { "epoch": 2.42, "grad_norm": 1.4827467735414195, "learning_rate": 9.628990012921734e-07, "loss": 0.0632, "step": 8848 }, { "epoch": 2.42, "grad_norm": 1.7924657188638018, "learning_rate": 9.620297391977746e-07, "loss": 0.0545, "step": 8849 }, { "epoch": 2.42, "grad_norm": 2.022742935537372, "learning_rate": 9.611608278794249e-07, "loss": 0.0553, "step": 8850 }, { "epoch": 2.42, "grad_norm": 1.4422068554644836, "learning_rate": 9.602922674126085e-07, "loss": 0.0394, "step": 8851 }, { "epoch": 2.42, "grad_norm": 1.6369553880332486, "learning_rate": 9.59424057872776e-07, "loss": 0.049, "step": 8852 }, { "epoch": 2.42, "grad_norm": 1.451703924426917, "learning_rate": 9.585561993353482e-07, "loss": 0.0425, "step": 8853 }, { "epoch": 2.42, "grad_norm": 1.3786391330358592, "learning_rate": 9.576886918757134e-07, "loss": 0.0425, "step": 8854 }, { "epoch": 2.42, "grad_norm": 1.483401282370702, "learning_rate": 9.568215355692351e-07, "loss": 0.0538, "step": 8855 }, { "epoch": 2.42, "grad_norm": 1.4605764005482178, "learning_rate": 9.559547304912392e-07, "loss": 0.0481, "step": 8856 }, { "epoch": 2.42, "grad_norm": 1.6277322929927311, "learning_rate": 9.550882767170278e-07, "loss": 0.0536, "step": 8857 }, { "epoch": 2.42, "grad_norm": 1.5463550556685373, "learning_rate": 9.54222174321867e-07, "loss": 0.0469, "step": 8858 }, { "epoch": 2.42, "grad_norm": 1.4353914517934903, "learning_rate": 9.533564233809939e-07, "loss": 0.0449, "step": 8859 }, { "epoch": 2.42, "grad_norm": 1.6869508507798658, "learning_rate": 9.524910239696189e-07, "loss": 0.0581, "step": 8860 }, { "epoch": 2.42, "grad_norm": 1.5780154260581238, "learning_rate": 9.516259761629148e-07, "loss": 0.0427, "step": 8861 }, { "epoch": 2.42, "grad_norm": 1.51502926948865, "learning_rate": 9.507612800360316e-07, "loss": 0.0464, "step": 8862 }, { "epoch": 2.42, "grad_norm": 1.5584709207979923, "learning_rate": 9.498969356640836e-07, "loss": 0.0492, "step": 8863 }, { "epoch": 2.42, "grad_norm": 1.4185636705077296, "learning_rate": 9.490329431221545e-07, "loss": 0.0489, "step": 8864 }, { "epoch": 2.42, "grad_norm": 1.4515765777261733, "learning_rate": 9.48169302485299e-07, "loss": 0.0475, "step": 8865 }, { "epoch": 2.42, "grad_norm": 1.7595135351921316, "learning_rate": 9.473060138285434e-07, "loss": 0.0643, "step": 8866 }, { "epoch": 2.42, "grad_norm": 1.4003256745507489, "learning_rate": 9.464430772268779e-07, "loss": 0.0437, "step": 8867 }, { "epoch": 2.42, "grad_norm": 1.388042495068433, "learning_rate": 9.455804927552681e-07, "loss": 0.0375, "step": 8868 }, { "epoch": 2.42, "grad_norm": 1.3559387656083617, "learning_rate": 9.447182604886446e-07, "loss": 0.0442, "step": 8869 }, { "epoch": 2.42, "grad_norm": 1.416570404498866, "learning_rate": 9.438563805019096e-07, "loss": 0.0386, "step": 8870 }, { "epoch": 2.42, "grad_norm": 1.705767330847675, "learning_rate": 9.429948528699329e-07, "loss": 0.0568, "step": 8871 }, { "epoch": 2.42, "grad_norm": 1.4819682440463875, "learning_rate": 9.421336776675565e-07, "loss": 0.0499, "step": 8872 }, { "epoch": 2.42, "grad_norm": 1.5690218538326075, "learning_rate": 9.412728549695888e-07, "loss": 0.0524, "step": 8873 }, { "epoch": 2.42, "grad_norm": 1.2435858016717076, "learning_rate": 9.404123848508107e-07, "loss": 0.0368, "step": 8874 }, { "epoch": 2.42, "grad_norm": 1.5186243940801072, "learning_rate": 9.395522673859698e-07, "loss": 0.0442, "step": 8875 }, { "epoch": 2.42, "grad_norm": 1.5427425080659716, "learning_rate": 9.386925026497835e-07, "loss": 0.0468, "step": 8876 }, { "epoch": 2.42, "grad_norm": 1.4019306326656193, "learning_rate": 9.378330907169387e-07, "loss": 0.0452, "step": 8877 }, { "epoch": 2.42, "grad_norm": 1.7366399206142773, "learning_rate": 9.369740316620935e-07, "loss": 0.056, "step": 8878 }, { "epoch": 2.42, "grad_norm": 1.1102651671172517, "learning_rate": 9.361153255598721e-07, "loss": 0.0349, "step": 8879 }, { "epoch": 2.42, "grad_norm": 1.7912278041817948, "learning_rate": 9.352569724848715e-07, "loss": 0.0573, "step": 8880 }, { "epoch": 2.42, "grad_norm": 1.4831430918563488, "learning_rate": 9.34398972511656e-07, "loss": 0.0582, "step": 8881 }, { "epoch": 2.42, "grad_norm": 1.4082439020614645, "learning_rate": 9.33541325714759e-07, "loss": 0.0406, "step": 8882 }, { "epoch": 2.43, "grad_norm": 1.5344946218336881, "learning_rate": 9.326840321686826e-07, "loss": 0.0437, "step": 8883 }, { "epoch": 2.43, "grad_norm": 1.557982352612514, "learning_rate": 9.318270919479022e-07, "loss": 0.0477, "step": 8884 }, { "epoch": 2.43, "grad_norm": 1.2931425737049431, "learning_rate": 9.309705051268564e-07, "loss": 0.0437, "step": 8885 }, { "epoch": 2.43, "grad_norm": 1.3837348866786165, "learning_rate": 9.301142717799594e-07, "loss": 0.0424, "step": 8886 }, { "epoch": 2.43, "grad_norm": 1.6136291831205603, "learning_rate": 9.292583919815906e-07, "loss": 0.0536, "step": 8887 }, { "epoch": 2.43, "grad_norm": 1.2269054288742103, "learning_rate": 9.284028658060995e-07, "loss": 0.0365, "step": 8888 }, { "epoch": 2.43, "grad_norm": 1.7875405393852057, "learning_rate": 9.275476933278038e-07, "loss": 0.058, "step": 8889 }, { "epoch": 2.43, "grad_norm": 1.405820008470654, "learning_rate": 9.266928746209946e-07, "loss": 0.0438, "step": 8890 }, { "epoch": 2.43, "grad_norm": 1.3503676716642974, "learning_rate": 9.258384097599266e-07, "loss": 0.0328, "step": 8891 }, { "epoch": 2.43, "grad_norm": 1.66583858820306, "learning_rate": 9.249842988188295e-07, "loss": 0.0446, "step": 8892 }, { "epoch": 2.43, "grad_norm": 1.2711118931266623, "learning_rate": 9.241305418718982e-07, "loss": 0.0378, "step": 8893 }, { "epoch": 2.43, "grad_norm": 1.555801336235142, "learning_rate": 9.232771389932976e-07, "loss": 0.0459, "step": 8894 }, { "epoch": 2.43, "grad_norm": 1.480189942479658, "learning_rate": 9.224240902571618e-07, "loss": 0.043, "step": 8895 }, { "epoch": 2.43, "grad_norm": 1.4656811459885832, "learning_rate": 9.215713957375961e-07, "loss": 0.0454, "step": 8896 }, { "epoch": 2.43, "grad_norm": 1.4912856411713014, "learning_rate": 9.20719055508672e-07, "loss": 0.0451, "step": 8897 }, { "epoch": 2.43, "grad_norm": 1.5684205187235856, "learning_rate": 9.198670696444339e-07, "loss": 0.0474, "step": 8898 }, { "epoch": 2.43, "grad_norm": 1.3387273689649501, "learning_rate": 9.190154382188921e-07, "loss": 0.0445, "step": 8899 }, { "epoch": 2.43, "grad_norm": 1.7424405804316927, "learning_rate": 9.181641613060271e-07, "loss": 0.0591, "step": 8900 }, { "epoch": 2.43, "grad_norm": 1.2953173454594806, "learning_rate": 9.173132389797878e-07, "loss": 0.0313, "step": 8901 }, { "epoch": 2.43, "grad_norm": 1.5458891993022181, "learning_rate": 9.164626713140956e-07, "loss": 0.0477, "step": 8902 }, { "epoch": 2.43, "grad_norm": 1.958214775298957, "learning_rate": 9.156124583828368e-07, "loss": 0.0561, "step": 8903 }, { "epoch": 2.43, "grad_norm": 1.5415768556482872, "learning_rate": 9.147626002598708e-07, "loss": 0.0568, "step": 8904 }, { "epoch": 2.43, "grad_norm": 1.7458338192573024, "learning_rate": 9.139130970190235e-07, "loss": 0.0502, "step": 8905 }, { "epoch": 2.43, "grad_norm": 1.33948263990128, "learning_rate": 9.130639487340903e-07, "loss": 0.0429, "step": 8906 }, { "epoch": 2.43, "grad_norm": 1.4869310372449824, "learning_rate": 9.12215155478835e-07, "loss": 0.0444, "step": 8907 }, { "epoch": 2.43, "grad_norm": 1.6133436769444875, "learning_rate": 9.113667173269947e-07, "loss": 0.0486, "step": 8908 }, { "epoch": 2.43, "grad_norm": 1.4643045993720862, "learning_rate": 9.105186343522698e-07, "loss": 0.0412, "step": 8909 }, { "epoch": 2.43, "grad_norm": 1.3937916003743358, "learning_rate": 9.096709066283355e-07, "loss": 0.0449, "step": 8910 }, { "epoch": 2.43, "grad_norm": 1.5561828186758775, "learning_rate": 9.088235342288315e-07, "loss": 0.047, "step": 8911 }, { "epoch": 2.43, "grad_norm": 1.6725011260253135, "learning_rate": 9.079765172273697e-07, "loss": 0.0494, "step": 8912 }, { "epoch": 2.43, "grad_norm": 1.676783555269748, "learning_rate": 9.071298556975278e-07, "loss": 0.0474, "step": 8913 }, { "epoch": 2.43, "grad_norm": 1.398297046029956, "learning_rate": 9.062835497128575e-07, "loss": 0.0438, "step": 8914 }, { "epoch": 2.43, "grad_norm": 1.3314652328600187, "learning_rate": 9.054375993468745e-07, "loss": 0.0369, "step": 8915 }, { "epoch": 2.43, "grad_norm": 1.7447067324934413, "learning_rate": 9.045920046730683e-07, "loss": 0.0576, "step": 8916 }, { "epoch": 2.43, "grad_norm": 1.3778030108000554, "learning_rate": 9.037467657648941e-07, "loss": 0.033, "step": 8917 }, { "epoch": 2.43, "grad_norm": 1.682401234750559, "learning_rate": 9.029018826957775e-07, "loss": 0.0468, "step": 8918 }, { "epoch": 2.43, "grad_norm": 1.97998012338404, "learning_rate": 9.020573555391116e-07, "loss": 0.058, "step": 8919 }, { "epoch": 2.44, "grad_norm": 1.5185950448877157, "learning_rate": 9.01213184368262e-07, "loss": 0.0477, "step": 8920 }, { "epoch": 2.44, "grad_norm": 1.3077831073047794, "learning_rate": 9.00369369256559e-07, "loss": 0.0444, "step": 8921 }, { "epoch": 2.44, "grad_norm": 1.258866649604549, "learning_rate": 8.99525910277308e-07, "loss": 0.0378, "step": 8922 }, { "epoch": 2.44, "grad_norm": 1.496496291836476, "learning_rate": 8.986828075037768e-07, "loss": 0.048, "step": 8923 }, { "epoch": 2.44, "grad_norm": 1.743012011117733, "learning_rate": 8.978400610092058e-07, "loss": 0.0555, "step": 8924 }, { "epoch": 2.44, "grad_norm": 1.7607407885442508, "learning_rate": 8.969976708668032e-07, "loss": 0.0469, "step": 8925 }, { "epoch": 2.44, "grad_norm": 1.606959196856989, "learning_rate": 8.961556371497493e-07, "loss": 0.0464, "step": 8926 }, { "epoch": 2.44, "grad_norm": 1.4554689227465931, "learning_rate": 8.953139599311883e-07, "loss": 0.0414, "step": 8927 }, { "epoch": 2.44, "grad_norm": 1.27526330763992, "learning_rate": 8.944726392842385e-07, "loss": 0.047, "step": 8928 }, { "epoch": 2.44, "grad_norm": 1.6510390643667272, "learning_rate": 8.936316752819834e-07, "loss": 0.0507, "step": 8929 }, { "epoch": 2.44, "grad_norm": 1.4372799593863448, "learning_rate": 8.927910679974783e-07, "loss": 0.0496, "step": 8930 }, { "epoch": 2.44, "grad_norm": 1.455611894118685, "learning_rate": 8.919508175037439e-07, "loss": 0.0409, "step": 8931 }, { "epoch": 2.44, "grad_norm": 1.5521467655265355, "learning_rate": 8.911109238737748e-07, "loss": 0.0536, "step": 8932 }, { "epoch": 2.44, "grad_norm": 1.5023983141121513, "learning_rate": 8.902713871805302e-07, "loss": 0.0467, "step": 8933 }, { "epoch": 2.44, "grad_norm": 1.7411531547120003, "learning_rate": 8.894322074969419e-07, "loss": 0.0536, "step": 8934 }, { "epoch": 2.44, "grad_norm": 1.3954188702441621, "learning_rate": 8.885933848959083e-07, "loss": 0.0461, "step": 8935 }, { "epoch": 2.44, "grad_norm": 1.2754190422906153, "learning_rate": 8.877549194502972e-07, "loss": 0.0377, "step": 8936 }, { "epoch": 2.44, "grad_norm": 1.5800340367118, "learning_rate": 8.86916811232944e-07, "loss": 0.0551, "step": 8937 }, { "epoch": 2.44, "grad_norm": 1.467730972761269, "learning_rate": 8.86079060316658e-07, "loss": 0.0557, "step": 8938 }, { "epoch": 2.44, "grad_norm": 1.4660268590982395, "learning_rate": 8.852416667742108e-07, "loss": 0.0429, "step": 8939 }, { "epoch": 2.44, "grad_norm": 1.5712180374033295, "learning_rate": 8.844046306783488e-07, "loss": 0.0412, "step": 8940 }, { "epoch": 2.44, "grad_norm": 1.4608688824960407, "learning_rate": 8.835679521017842e-07, "loss": 0.0433, "step": 8941 }, { "epoch": 2.44, "grad_norm": 1.497105914919798, "learning_rate": 8.827316311171986e-07, "loss": 0.0477, "step": 8942 }, { "epoch": 2.44, "grad_norm": 1.5381419947103372, "learning_rate": 8.818956677972407e-07, "loss": 0.0498, "step": 8943 }, { "epoch": 2.44, "grad_norm": 1.5855770823760886, "learning_rate": 8.810600622145337e-07, "loss": 0.054, "step": 8944 }, { "epoch": 2.44, "grad_norm": 1.4341688561111856, "learning_rate": 8.802248144416625e-07, "loss": 0.0416, "step": 8945 }, { "epoch": 2.44, "grad_norm": 1.6459914327100782, "learning_rate": 8.793899245511884e-07, "loss": 0.0467, "step": 8946 }, { "epoch": 2.44, "grad_norm": 1.3821434203234158, "learning_rate": 8.785553926156354e-07, "loss": 0.047, "step": 8947 }, { "epoch": 2.44, "grad_norm": 1.4548495232975707, "learning_rate": 8.777212187074996e-07, "loss": 0.0447, "step": 8948 }, { "epoch": 2.44, "grad_norm": 1.551450707363934, "learning_rate": 8.768874028992431e-07, "loss": 0.0438, "step": 8949 }, { "epoch": 2.44, "grad_norm": 1.513944080432506, "learning_rate": 8.76053945263301e-07, "loss": 0.0489, "step": 8950 }, { "epoch": 2.44, "grad_norm": 1.3817290415720977, "learning_rate": 8.752208458720762e-07, "loss": 0.0432, "step": 8951 }, { "epoch": 2.44, "grad_norm": 1.346948102230618, "learning_rate": 8.743881047979381e-07, "loss": 0.0413, "step": 8952 }, { "epoch": 2.44, "grad_norm": 1.513365042625213, "learning_rate": 8.735557221132268e-07, "loss": 0.0474, "step": 8953 }, { "epoch": 2.44, "grad_norm": 1.440045238603853, "learning_rate": 8.727236978902492e-07, "loss": 0.0438, "step": 8954 }, { "epoch": 2.44, "grad_norm": 1.4428019679273554, "learning_rate": 8.718920322012858e-07, "loss": 0.0468, "step": 8955 }, { "epoch": 2.44, "grad_norm": 1.479941510035178, "learning_rate": 8.710607251185799e-07, "loss": 0.0432, "step": 8956 }, { "epoch": 2.45, "grad_norm": 1.4818894274913803, "learning_rate": 8.702297767143497e-07, "loss": 0.0361, "step": 8957 }, { "epoch": 2.45, "grad_norm": 1.4974284264178659, "learning_rate": 8.693991870607771e-07, "loss": 0.0475, "step": 8958 }, { "epoch": 2.45, "grad_norm": 1.4934142023669315, "learning_rate": 8.685689562300159e-07, "loss": 0.0525, "step": 8959 }, { "epoch": 2.45, "grad_norm": 1.6622345692125744, "learning_rate": 8.677390842941857e-07, "loss": 0.0502, "step": 8960 }, { "epoch": 2.45, "grad_norm": 1.606256589133991, "learning_rate": 8.669095713253795e-07, "loss": 0.0444, "step": 8961 }, { "epoch": 2.45, "grad_norm": 1.5078717472467353, "learning_rate": 8.66080417395655e-07, "loss": 0.0546, "step": 8962 }, { "epoch": 2.45, "grad_norm": 1.4019987152525188, "learning_rate": 8.652516225770419e-07, "loss": 0.0435, "step": 8963 }, { "epoch": 2.45, "grad_norm": 1.3791596253621285, "learning_rate": 8.64423186941536e-07, "loss": 0.0432, "step": 8964 }, { "epoch": 2.45, "grad_norm": 1.4422979317113884, "learning_rate": 8.635951105611035e-07, "loss": 0.041, "step": 8965 }, { "epoch": 2.45, "grad_norm": 1.3701412877269576, "learning_rate": 8.627673935076769e-07, "loss": 0.0449, "step": 8966 }, { "epoch": 2.45, "grad_norm": 1.3285394799292558, "learning_rate": 8.619400358531626e-07, "loss": 0.0422, "step": 8967 }, { "epoch": 2.45, "grad_norm": 1.241747993182049, "learning_rate": 8.611130376694299e-07, "loss": 0.0376, "step": 8968 }, { "epoch": 2.45, "grad_norm": 1.662961027116381, "learning_rate": 8.602863990283217e-07, "loss": 0.0432, "step": 8969 }, { "epoch": 2.45, "grad_norm": 1.4828059178672184, "learning_rate": 8.594601200016472e-07, "loss": 0.0463, "step": 8970 }, { "epoch": 2.45, "grad_norm": 1.5003132376295374, "learning_rate": 8.586342006611847e-07, "loss": 0.0482, "step": 8971 }, { "epoch": 2.45, "grad_norm": 1.4228374227419227, "learning_rate": 8.578086410786796e-07, "loss": 0.0496, "step": 8972 }, { "epoch": 2.45, "grad_norm": 1.4029162320759927, "learning_rate": 8.569834413258505e-07, "loss": 0.0418, "step": 8973 }, { "epoch": 2.45, "grad_norm": 1.3007415715774118, "learning_rate": 8.561586014743789e-07, "loss": 0.0433, "step": 8974 }, { "epoch": 2.45, "grad_norm": 1.4906295592309138, "learning_rate": 8.553341215959215e-07, "loss": 0.0408, "step": 8975 }, { "epoch": 2.45, "grad_norm": 1.5469424546303432, "learning_rate": 8.545100017620988e-07, "loss": 0.0528, "step": 8976 }, { "epoch": 2.45, "grad_norm": 1.4790081689765433, "learning_rate": 8.536862420445019e-07, "loss": 0.0411, "step": 8977 }, { "epoch": 2.45, "grad_norm": 1.325325073211631, "learning_rate": 8.528628425146885e-07, "loss": 0.0438, "step": 8978 }, { "epoch": 2.45, "grad_norm": 1.6423657308090662, "learning_rate": 8.520398032441896e-07, "loss": 0.0475, "step": 8979 }, { "epoch": 2.45, "grad_norm": 1.8370377242915266, "learning_rate": 8.512171243044992e-07, "loss": 0.0464, "step": 8980 }, { "epoch": 2.45, "grad_norm": 1.3428002091129605, "learning_rate": 8.503948057670863e-07, "loss": 0.0363, "step": 8981 }, { "epoch": 2.45, "grad_norm": 1.5992235106442614, "learning_rate": 8.495728477033832e-07, "loss": 0.0529, "step": 8982 }, { "epoch": 2.45, "grad_norm": 1.5251309877657038, "learning_rate": 8.487512501847933e-07, "loss": 0.049, "step": 8983 }, { "epoch": 2.45, "grad_norm": 1.3934953433798714, "learning_rate": 8.479300132826873e-07, "loss": 0.0387, "step": 8984 }, { "epoch": 2.45, "grad_norm": 1.656173642326572, "learning_rate": 8.47109137068407e-07, "loss": 0.0524, "step": 8985 }, { "epoch": 2.45, "grad_norm": 1.600286517758158, "learning_rate": 8.462886216132604e-07, "loss": 0.0581, "step": 8986 }, { "epoch": 2.45, "grad_norm": 1.5065598208541373, "learning_rate": 8.45468466988526e-07, "loss": 0.0485, "step": 8987 }, { "epoch": 2.45, "grad_norm": 1.5658449333804347, "learning_rate": 8.446486732654508e-07, "loss": 0.0455, "step": 8988 }, { "epoch": 2.45, "grad_norm": 1.3885582390485607, "learning_rate": 8.438292405152477e-07, "loss": 0.0517, "step": 8989 }, { "epoch": 2.45, "grad_norm": 1.338702883809766, "learning_rate": 8.430101688091009e-07, "loss": 0.0377, "step": 8990 }, { "epoch": 2.45, "grad_norm": 1.4501031663604222, "learning_rate": 8.421914582181639e-07, "loss": 0.0484, "step": 8991 }, { "epoch": 2.45, "grad_norm": 1.606599166203397, "learning_rate": 8.413731088135563e-07, "loss": 0.049, "step": 8992 }, { "epoch": 2.46, "grad_norm": 1.6099786011135102, "learning_rate": 8.405551206663686e-07, "loss": 0.0443, "step": 8993 }, { "epoch": 2.46, "grad_norm": 1.4472703398053017, "learning_rate": 8.397374938476594e-07, "loss": 0.0442, "step": 8994 }, { "epoch": 2.46, "grad_norm": 1.4943388990749984, "learning_rate": 8.389202284284536e-07, "loss": 0.0476, "step": 8995 }, { "epoch": 2.46, "grad_norm": 1.511444428661888, "learning_rate": 8.38103324479747e-07, "loss": 0.0504, "step": 8996 }, { "epoch": 2.46, "grad_norm": 1.363267847532087, "learning_rate": 8.37286782072505e-07, "loss": 0.0358, "step": 8997 }, { "epoch": 2.46, "grad_norm": 1.4133418350198572, "learning_rate": 8.36470601277658e-07, "loss": 0.0435, "step": 8998 }, { "epoch": 2.46, "grad_norm": 1.6288271493226334, "learning_rate": 8.356547821661098e-07, "loss": 0.0548, "step": 8999 }, { "epoch": 2.46, "grad_norm": 1.6517041938394745, "learning_rate": 8.348393248087289e-07, "loss": 0.0503, "step": 9000 }, { "epoch": 2.46, "grad_norm": 1.2574762738780376, "learning_rate": 8.340242292763529e-07, "loss": 0.0397, "step": 9001 }, { "epoch": 2.46, "grad_norm": 1.3575275716684383, "learning_rate": 8.33209495639788e-07, "loss": 0.039, "step": 9002 }, { "epoch": 2.46, "grad_norm": 1.4252422645237195, "learning_rate": 8.323951239698119e-07, "loss": 0.0489, "step": 9003 }, { "epoch": 2.46, "grad_norm": 1.5413535320784262, "learning_rate": 8.315811143371666e-07, "loss": 0.0487, "step": 9004 }, { "epoch": 2.46, "grad_norm": 1.541034198856851, "learning_rate": 8.307674668125665e-07, "loss": 0.0444, "step": 9005 }, { "epoch": 2.46, "grad_norm": 1.2813976530781592, "learning_rate": 8.299541814666917e-07, "loss": 0.0371, "step": 9006 }, { "epoch": 2.46, "grad_norm": 1.6958676468289544, "learning_rate": 8.291412583701913e-07, "loss": 0.0501, "step": 9007 }, { "epoch": 2.46, "grad_norm": 1.5297521809715184, "learning_rate": 8.283286975936833e-07, "loss": 0.0476, "step": 9008 }, { "epoch": 2.46, "grad_norm": 1.4493495446458038, "learning_rate": 8.275164992077555e-07, "loss": 0.0458, "step": 9009 }, { "epoch": 2.46, "grad_norm": 1.4896130776669245, "learning_rate": 8.267046632829618e-07, "loss": 0.0491, "step": 9010 }, { "epoch": 2.46, "grad_norm": 1.6503397423052593, "learning_rate": 8.258931898898276e-07, "loss": 0.0546, "step": 9011 }, { "epoch": 2.46, "grad_norm": 1.5549274654388119, "learning_rate": 8.250820790988446e-07, "loss": 0.0501, "step": 9012 }, { "epoch": 2.46, "grad_norm": 1.358183941349689, "learning_rate": 8.242713309804729e-07, "loss": 0.0404, "step": 9013 }, { "epoch": 2.46, "grad_norm": 1.3940337228855835, "learning_rate": 8.234609456051402e-07, "loss": 0.0439, "step": 9014 }, { "epoch": 2.46, "grad_norm": 1.5995225619911047, "learning_rate": 8.226509230432472e-07, "loss": 0.0459, "step": 9015 }, { "epoch": 2.46, "grad_norm": 1.4005265088750665, "learning_rate": 8.218412633651579e-07, "loss": 0.0383, "step": 9016 }, { "epoch": 2.46, "grad_norm": 1.231710447697946, "learning_rate": 8.210319666412087e-07, "loss": 0.0409, "step": 9017 }, { "epoch": 2.46, "grad_norm": 1.5731805999632344, "learning_rate": 8.202230329417016e-07, "loss": 0.0511, "step": 9018 }, { "epoch": 2.46, "grad_norm": 1.6128783761907122, "learning_rate": 8.194144623369083e-07, "loss": 0.0462, "step": 9019 }, { "epoch": 2.46, "grad_norm": 1.6316539814389979, "learning_rate": 8.18606254897068e-07, "loss": 0.0526, "step": 9020 }, { "epoch": 2.46, "grad_norm": 1.4191188583693426, "learning_rate": 8.177984106923914e-07, "loss": 0.0456, "step": 9021 }, { "epoch": 2.46, "grad_norm": 1.5408719925757557, "learning_rate": 8.169909297930528e-07, "loss": 0.0546, "step": 9022 }, { "epoch": 2.46, "grad_norm": 1.3613786770416876, "learning_rate": 8.161838122692e-07, "loss": 0.0379, "step": 9023 }, { "epoch": 2.46, "grad_norm": 1.4965932979154073, "learning_rate": 8.15377058190946e-07, "loss": 0.0443, "step": 9024 }, { "epoch": 2.46, "grad_norm": 1.5226486809817725, "learning_rate": 8.145706676283727e-07, "loss": 0.0459, "step": 9025 }, { "epoch": 2.46, "grad_norm": 1.5170960817361812, "learning_rate": 8.137646406515293e-07, "loss": 0.0515, "step": 9026 }, { "epoch": 2.46, "grad_norm": 1.215493483329827, "learning_rate": 8.129589773304381e-07, "loss": 0.0393, "step": 9027 }, { "epoch": 2.46, "grad_norm": 1.5544798075202735, "learning_rate": 8.121536777350836e-07, "loss": 0.0444, "step": 9028 }, { "epoch": 2.46, "grad_norm": 1.3790009615106615, "learning_rate": 8.113487419354244e-07, "loss": 0.0421, "step": 9029 }, { "epoch": 2.47, "grad_norm": 1.6083808501873749, "learning_rate": 8.105441700013827e-07, "loss": 0.046, "step": 9030 }, { "epoch": 2.47, "grad_norm": 1.4936871187777547, "learning_rate": 8.097399620028523e-07, "loss": 0.0445, "step": 9031 }, { "epoch": 2.47, "grad_norm": 1.591663116355617, "learning_rate": 8.089361180096927e-07, "loss": 0.0495, "step": 9032 }, { "epoch": 2.47, "grad_norm": 1.5017803024250382, "learning_rate": 8.08132638091736e-07, "loss": 0.0482, "step": 9033 }, { "epoch": 2.47, "grad_norm": 1.4127755181942563, "learning_rate": 8.073295223187766e-07, "loss": 0.0349, "step": 9034 }, { "epoch": 2.47, "grad_norm": 1.1853239417764114, "learning_rate": 8.06526770760584e-07, "loss": 0.0384, "step": 9035 }, { "epoch": 2.47, "grad_norm": 1.3324437910311249, "learning_rate": 8.057243834868916e-07, "loss": 0.0436, "step": 9036 }, { "epoch": 2.47, "grad_norm": 1.1707072770807363, "learning_rate": 8.049223605674023e-07, "loss": 0.0361, "step": 9037 }, { "epoch": 2.47, "grad_norm": 4.451944514938382, "learning_rate": 8.041207020717851e-07, "loss": 0.0526, "step": 9038 }, { "epoch": 2.47, "grad_norm": 1.287628025223005, "learning_rate": 8.033194080696833e-07, "loss": 0.0408, "step": 9039 }, { "epoch": 2.47, "grad_norm": 1.5210471715688836, "learning_rate": 8.025184786307016e-07, "loss": 0.0422, "step": 9040 }, { "epoch": 2.47, "grad_norm": 1.2261789493315818, "learning_rate": 8.017179138244191e-07, "loss": 0.0347, "step": 9041 }, { "epoch": 2.47, "grad_norm": 1.4218751493565884, "learning_rate": 8.009177137203794e-07, "loss": 0.0407, "step": 9042 }, { "epoch": 2.47, "grad_norm": 1.4753775062765253, "learning_rate": 8.001178783880936e-07, "loss": 0.0479, "step": 9043 }, { "epoch": 2.47, "grad_norm": 1.3520247096842597, "learning_rate": 7.99318407897045e-07, "loss": 0.0423, "step": 9044 }, { "epoch": 2.47, "grad_norm": 1.758429802604417, "learning_rate": 7.985193023166821e-07, "loss": 0.0454, "step": 9045 }, { "epoch": 2.47, "grad_norm": 1.1879696122580803, "learning_rate": 7.977205617164241e-07, "loss": 0.0358, "step": 9046 }, { "epoch": 2.47, "grad_norm": 1.5109030996365918, "learning_rate": 7.969221861656557e-07, "loss": 0.0445, "step": 9047 }, { "epoch": 2.47, "grad_norm": 1.4234089690038207, "learning_rate": 7.961241757337324e-07, "loss": 0.0469, "step": 9048 }, { "epoch": 2.47, "grad_norm": 1.6390994822687233, "learning_rate": 7.953265304899743e-07, "loss": 0.0455, "step": 9049 }, { "epoch": 2.47, "grad_norm": 1.3827650030811074, "learning_rate": 7.945292505036762e-07, "loss": 0.0401, "step": 9050 }, { "epoch": 2.47, "grad_norm": 1.5179390231928465, "learning_rate": 7.937323358440935e-07, "loss": 0.038, "step": 9051 }, { "epoch": 2.47, "grad_norm": 1.4298489588422658, "learning_rate": 7.929357865804571e-07, "loss": 0.0457, "step": 9052 }, { "epoch": 2.47, "grad_norm": 1.5887061713564796, "learning_rate": 7.921396027819616e-07, "loss": 0.0511, "step": 9053 }, { "epoch": 2.47, "grad_norm": 1.6842889479197347, "learning_rate": 7.913437845177701e-07, "loss": 0.0569, "step": 9054 }, { "epoch": 2.47, "grad_norm": 1.4826350381534015, "learning_rate": 7.905483318570145e-07, "loss": 0.0501, "step": 9055 }, { "epoch": 2.47, "grad_norm": 1.4449163465246155, "learning_rate": 7.897532448687978e-07, "loss": 0.0468, "step": 9056 }, { "epoch": 2.47, "grad_norm": 1.364153784443725, "learning_rate": 7.889585236221853e-07, "loss": 0.0422, "step": 9057 }, { "epoch": 2.47, "grad_norm": 1.1839617974893517, "learning_rate": 7.881641681862173e-07, "loss": 0.0372, "step": 9058 }, { "epoch": 2.47, "grad_norm": 1.4551291759469345, "learning_rate": 7.873701786298976e-07, "loss": 0.0468, "step": 9059 }, { "epoch": 2.47, "grad_norm": 1.7694456129801226, "learning_rate": 7.865765550221993e-07, "loss": 0.0532, "step": 9060 }, { "epoch": 2.47, "grad_norm": 1.595986510898096, "learning_rate": 7.857832974320634e-07, "loss": 0.0485, "step": 9061 }, { "epoch": 2.47, "grad_norm": 1.333406306805132, "learning_rate": 7.849904059284014e-07, "loss": 0.038, "step": 9062 }, { "epoch": 2.47, "grad_norm": 1.4310867823210012, "learning_rate": 7.841978805800887e-07, "loss": 0.0435, "step": 9063 }, { "epoch": 2.47, "grad_norm": 1.5610426955813441, "learning_rate": 7.834057214559749e-07, "loss": 0.0438, "step": 9064 }, { "epoch": 2.47, "grad_norm": 1.3302655459612682, "learning_rate": 7.82613928624873e-07, "loss": 0.0368, "step": 9065 }, { "epoch": 2.48, "grad_norm": 1.3312887321039264, "learning_rate": 7.818225021555648e-07, "loss": 0.0439, "step": 9066 }, { "epoch": 2.48, "grad_norm": 1.4530976531464472, "learning_rate": 7.810314421168003e-07, "loss": 0.047, "step": 9067 }, { "epoch": 2.48, "grad_norm": 1.4705509609400045, "learning_rate": 7.802407485773011e-07, "loss": 0.0456, "step": 9068 }, { "epoch": 2.48, "grad_norm": 1.290940710341443, "learning_rate": 7.794504216057513e-07, "loss": 0.0404, "step": 9069 }, { "epoch": 2.48, "grad_norm": 1.5746991313637482, "learning_rate": 7.786604612708093e-07, "loss": 0.0461, "step": 9070 }, { "epoch": 2.48, "grad_norm": 1.317041374842992, "learning_rate": 7.778708676410962e-07, "loss": 0.0435, "step": 9071 }, { "epoch": 2.48, "grad_norm": 1.5982556350044739, "learning_rate": 7.770816407852045e-07, "loss": 0.05, "step": 9072 }, { "epoch": 2.48, "grad_norm": 1.4054662972230303, "learning_rate": 7.762927807716925e-07, "loss": 0.0498, "step": 9073 }, { "epoch": 2.48, "grad_norm": 1.6816352853804117, "learning_rate": 7.755042876690893e-07, "loss": 0.0434, "step": 9074 }, { "epoch": 2.48, "grad_norm": 1.5184365092681416, "learning_rate": 7.747161615458903e-07, "loss": 0.049, "step": 9075 }, { "epoch": 2.48, "grad_norm": 1.5790024943273662, "learning_rate": 7.739284024705601e-07, "loss": 0.0481, "step": 9076 }, { "epoch": 2.48, "grad_norm": 1.5601657919042449, "learning_rate": 7.731410105115311e-07, "loss": 0.0438, "step": 9077 }, { "epoch": 2.48, "grad_norm": 1.4016034601639242, "learning_rate": 7.723539857372026e-07, "loss": 0.043, "step": 9078 }, { "epoch": 2.48, "grad_norm": 1.2513766785266978, "learning_rate": 7.715673282159425e-07, "loss": 0.042, "step": 9079 }, { "epoch": 2.48, "grad_norm": 1.3304815997111177, "learning_rate": 7.707810380160891e-07, "loss": 0.0428, "step": 9080 }, { "epoch": 2.48, "grad_norm": 1.4750189006759722, "learning_rate": 7.699951152059448e-07, "loss": 0.0465, "step": 9081 }, { "epoch": 2.48, "grad_norm": 1.2760917233304985, "learning_rate": 7.692095598537847e-07, "loss": 0.0418, "step": 9082 }, { "epoch": 2.48, "grad_norm": 1.5261198510865022, "learning_rate": 7.684243720278478e-07, "loss": 0.0432, "step": 9083 }, { "epoch": 2.48, "grad_norm": 1.7639380720526652, "learning_rate": 7.676395517963436e-07, "loss": 0.0427, "step": 9084 }, { "epoch": 2.48, "grad_norm": 1.433297945762989, "learning_rate": 7.668550992274476e-07, "loss": 0.0453, "step": 9085 }, { "epoch": 2.48, "grad_norm": 1.522719045783516, "learning_rate": 7.660710143893069e-07, "loss": 0.0463, "step": 9086 }, { "epoch": 2.48, "grad_norm": 1.6697006447180247, "learning_rate": 7.652872973500325e-07, "loss": 0.0496, "step": 9087 }, { "epoch": 2.48, "grad_norm": 1.6728096140500466, "learning_rate": 7.645039481777073e-07, "loss": 0.055, "step": 9088 }, { "epoch": 2.48, "grad_norm": 1.5890631151395276, "learning_rate": 7.637209669403789e-07, "loss": 0.0534, "step": 9089 }, { "epoch": 2.48, "grad_norm": 1.655969852936737, "learning_rate": 7.629383537060653e-07, "loss": 0.0412, "step": 9090 }, { "epoch": 2.48, "grad_norm": 1.7967731667136608, "learning_rate": 7.621561085427503e-07, "loss": 0.05, "step": 9091 }, { "epoch": 2.48, "grad_norm": 1.7604609885702558, "learning_rate": 7.613742315183887e-07, "loss": 0.0458, "step": 9092 }, { "epoch": 2.48, "grad_norm": 1.8146974337203476, "learning_rate": 7.605927227009002e-07, "loss": 0.0504, "step": 9093 }, { "epoch": 2.48, "grad_norm": 1.6356878186592496, "learning_rate": 7.598115821581759e-07, "loss": 0.0497, "step": 9094 }, { "epoch": 2.48, "grad_norm": 1.493286672819636, "learning_rate": 7.590308099580718e-07, "loss": 0.0519, "step": 9095 }, { "epoch": 2.48, "grad_norm": 1.6021212748673392, "learning_rate": 7.582504061684131e-07, "loss": 0.0583, "step": 9096 }, { "epoch": 2.48, "grad_norm": 1.6090582071614288, "learning_rate": 7.57470370856992e-07, "loss": 0.0551, "step": 9097 }, { "epoch": 2.48, "grad_norm": 1.3090115400577307, "learning_rate": 7.566907040915721e-07, "loss": 0.0458, "step": 9098 }, { "epoch": 2.48, "grad_norm": 1.3645653663675927, "learning_rate": 7.559114059398804e-07, "loss": 0.0478, "step": 9099 }, { "epoch": 2.48, "grad_norm": 1.6990392092582034, "learning_rate": 7.551324764696155e-07, "loss": 0.0519, "step": 9100 }, { "epoch": 2.48, "grad_norm": 1.477471738769153, "learning_rate": 7.543539157484425e-07, "loss": 0.0479, "step": 9101 }, { "epoch": 2.48, "grad_norm": 1.3260412189248787, "learning_rate": 7.535757238439939e-07, "loss": 0.0385, "step": 9102 }, { "epoch": 2.49, "grad_norm": 1.4482554309030768, "learning_rate": 7.527979008238695e-07, "loss": 0.049, "step": 9103 }, { "epoch": 2.49, "grad_norm": 1.507983976866288, "learning_rate": 7.520204467556407e-07, "loss": 0.0417, "step": 9104 }, { "epoch": 2.49, "grad_norm": 1.7837008751581924, "learning_rate": 7.512433617068426e-07, "loss": 0.0512, "step": 9105 }, { "epoch": 2.49, "grad_norm": 1.5918365709689872, "learning_rate": 7.504666457449822e-07, "loss": 0.0539, "step": 9106 }, { "epoch": 2.49, "grad_norm": 1.9112005434083585, "learning_rate": 7.49690298937531e-07, "loss": 0.0569, "step": 9107 }, { "epoch": 2.49, "grad_norm": 1.7623075250190148, "learning_rate": 7.489143213519301e-07, "loss": 0.0487, "step": 9108 }, { "epoch": 2.49, "grad_norm": 1.2710173598255867, "learning_rate": 7.481387130555868e-07, "loss": 0.0388, "step": 9109 }, { "epoch": 2.49, "grad_norm": 1.4368405095243892, "learning_rate": 7.473634741158797e-07, "loss": 0.0479, "step": 9110 }, { "epoch": 2.49, "grad_norm": 1.586905853660764, "learning_rate": 7.465886046001519e-07, "loss": 0.0482, "step": 9111 }, { "epoch": 2.49, "grad_norm": 1.5129254124410179, "learning_rate": 7.458141045757172e-07, "loss": 0.0442, "step": 9112 }, { "epoch": 2.49, "grad_norm": 1.5185101930640894, "learning_rate": 7.450399741098557e-07, "loss": 0.0433, "step": 9113 }, { "epoch": 2.49, "grad_norm": 1.3830462799794978, "learning_rate": 7.442662132698148e-07, "loss": 0.0435, "step": 9114 }, { "epoch": 2.49, "grad_norm": 1.3768448813877903, "learning_rate": 7.434928221228105e-07, "loss": 0.0417, "step": 9115 }, { "epoch": 2.49, "grad_norm": 1.6428633489229871, "learning_rate": 7.427198007360282e-07, "loss": 0.0553, "step": 9116 }, { "epoch": 2.49, "grad_norm": 1.2559999935875805, "learning_rate": 7.419471491766173e-07, "loss": 0.0392, "step": 9117 }, { "epoch": 2.49, "grad_norm": 1.7471362532604113, "learning_rate": 7.411748675117008e-07, "loss": 0.0592, "step": 9118 }, { "epoch": 2.49, "grad_norm": 1.5774018878719787, "learning_rate": 7.404029558083653e-07, "loss": 0.0592, "step": 9119 }, { "epoch": 2.49, "grad_norm": 1.4492076342546396, "learning_rate": 7.396314141336652e-07, "loss": 0.0456, "step": 9120 }, { "epoch": 2.49, "grad_norm": 1.4668121524540976, "learning_rate": 7.388602425546237e-07, "loss": 0.0508, "step": 9121 }, { "epoch": 2.49, "grad_norm": 1.7264432790167168, "learning_rate": 7.380894411382339e-07, "loss": 0.0565, "step": 9122 }, { "epoch": 2.49, "grad_norm": 1.3763246314609863, "learning_rate": 7.373190099514521e-07, "loss": 0.0513, "step": 9123 }, { "epoch": 2.49, "grad_norm": 1.5509895088181258, "learning_rate": 7.365489490612083e-07, "loss": 0.0462, "step": 9124 }, { "epoch": 2.49, "grad_norm": 1.4387997706783684, "learning_rate": 7.357792585343959e-07, "loss": 0.0483, "step": 9125 }, { "epoch": 2.49, "grad_norm": 1.116156077270865, "learning_rate": 7.350099384378773e-07, "loss": 0.0336, "step": 9126 }, { "epoch": 2.49, "grad_norm": 1.4659906791550648, "learning_rate": 7.342409888384816e-07, "loss": 0.0422, "step": 9127 }, { "epoch": 2.49, "grad_norm": 1.252971903235181, "learning_rate": 7.334724098030094e-07, "loss": 0.0415, "step": 9128 }, { "epoch": 2.49, "grad_norm": 2.0192814914859376, "learning_rate": 7.32704201398225e-07, "loss": 0.0664, "step": 9129 }, { "epoch": 2.49, "grad_norm": 1.4548849854891277, "learning_rate": 7.319363636908633e-07, "loss": 0.0382, "step": 9130 }, { "epoch": 2.49, "grad_norm": 1.3015757993164256, "learning_rate": 7.311688967476255e-07, "loss": 0.0427, "step": 9131 }, { "epoch": 2.49, "grad_norm": 1.3161088599106257, "learning_rate": 7.30401800635181e-07, "loss": 0.0394, "step": 9132 }, { "epoch": 2.49, "grad_norm": 1.3149310551921567, "learning_rate": 7.296350754201653e-07, "loss": 0.0433, "step": 9133 }, { "epoch": 2.49, "grad_norm": 1.5344309965861216, "learning_rate": 7.288687211691864e-07, "loss": 0.0463, "step": 9134 }, { "epoch": 2.49, "grad_norm": 1.4192949624323892, "learning_rate": 7.281027379488143e-07, "loss": 0.0415, "step": 9135 }, { "epoch": 2.49, "grad_norm": 1.628533774123234, "learning_rate": 7.273371258255923e-07, "loss": 0.0528, "step": 9136 }, { "epoch": 2.49, "grad_norm": 1.435250830705924, "learning_rate": 7.26571884866027e-07, "loss": 0.0342, "step": 9137 }, { "epoch": 2.49, "grad_norm": 1.6009934184279602, "learning_rate": 7.258070151365931e-07, "loss": 0.0489, "step": 9138 }, { "epoch": 2.49, "grad_norm": 1.5112383965783929, "learning_rate": 7.250425167037367e-07, "loss": 0.0478, "step": 9139 }, { "epoch": 2.5, "grad_norm": 1.6967305849472853, "learning_rate": 7.242783896338678e-07, "loss": 0.0481, "step": 9140 }, { "epoch": 2.5, "grad_norm": 1.5222964590595602, "learning_rate": 7.235146339933674e-07, "loss": 0.0472, "step": 9141 }, { "epoch": 2.5, "grad_norm": 1.5473918720573918, "learning_rate": 7.227512498485812e-07, "loss": 0.0449, "step": 9142 }, { "epoch": 2.5, "grad_norm": 1.3845105852340565, "learning_rate": 7.219882372658237e-07, "loss": 0.0425, "step": 9143 }, { "epoch": 2.5, "grad_norm": 1.2379720794844282, "learning_rate": 7.212255963113773e-07, "loss": 0.0405, "step": 9144 }, { "epoch": 2.5, "grad_norm": 1.7886078232425295, "learning_rate": 7.204633270514932e-07, "loss": 0.0593, "step": 9145 }, { "epoch": 2.5, "grad_norm": 1.7484915588835566, "learning_rate": 7.197014295523879e-07, "loss": 0.0558, "step": 9146 }, { "epoch": 2.5, "grad_norm": 1.5321798563832638, "learning_rate": 7.189399038802492e-07, "loss": 0.0525, "step": 9147 }, { "epoch": 2.5, "grad_norm": 1.583366697512015, "learning_rate": 7.181787501012283e-07, "loss": 0.0478, "step": 9148 }, { "epoch": 2.5, "grad_norm": 1.6110618663636844, "learning_rate": 7.17417968281447e-07, "loss": 0.0454, "step": 9149 }, { "epoch": 2.5, "grad_norm": 1.5776490410012918, "learning_rate": 7.166575584869929e-07, "loss": 0.0458, "step": 9150 }, { "epoch": 2.5, "grad_norm": 1.2747137327793674, "learning_rate": 7.158975207839241e-07, "loss": 0.0416, "step": 9151 }, { "epoch": 2.5, "grad_norm": 1.4365939432555175, "learning_rate": 7.151378552382627e-07, "loss": 0.0485, "step": 9152 }, { "epoch": 2.5, "grad_norm": 1.6321785953028134, "learning_rate": 7.143785619160026e-07, "loss": 0.0441, "step": 9153 }, { "epoch": 2.5, "grad_norm": 1.3846858880309934, "learning_rate": 7.136196408831014e-07, "loss": 0.0423, "step": 9154 }, { "epoch": 2.5, "grad_norm": 1.6919957307652642, "learning_rate": 7.128610922054874e-07, "loss": 0.0427, "step": 9155 }, { "epoch": 2.5, "grad_norm": 1.7780764373082132, "learning_rate": 7.121029159490533e-07, "loss": 0.0629, "step": 9156 }, { "epoch": 2.5, "grad_norm": 1.637754464819675, "learning_rate": 7.113451121796632e-07, "loss": 0.0458, "step": 9157 }, { "epoch": 2.5, "grad_norm": 1.6384577532866529, "learning_rate": 7.105876809631462e-07, "loss": 0.041, "step": 9158 }, { "epoch": 2.5, "grad_norm": 1.382256613876428, "learning_rate": 7.098306223653013e-07, "loss": 0.0366, "step": 9159 }, { "epoch": 2.5, "grad_norm": 1.4383202923067384, "learning_rate": 7.090739364518923e-07, "loss": 0.04, "step": 9160 }, { "epoch": 2.5, "grad_norm": 1.3692997221121066, "learning_rate": 7.083176232886524e-07, "loss": 0.0394, "step": 9161 }, { "epoch": 2.5, "grad_norm": 1.5784348692505208, "learning_rate": 7.075616829412806e-07, "loss": 0.041, "step": 9162 }, { "epoch": 2.5, "grad_norm": 1.4999909944735648, "learning_rate": 7.068061154754485e-07, "loss": 0.0499, "step": 9163 }, { "epoch": 2.5, "grad_norm": 1.5382113976134686, "learning_rate": 7.060509209567878e-07, "loss": 0.0365, "step": 9164 }, { "epoch": 2.5, "grad_norm": 1.5916825069678298, "learning_rate": 7.052960994509056e-07, "loss": 0.0445, "step": 9165 }, { "epoch": 2.5, "grad_norm": 1.3037641116882095, "learning_rate": 7.045416510233705e-07, "loss": 0.0349, "step": 9166 }, { "epoch": 2.5, "grad_norm": 1.6036895352373282, "learning_rate": 7.037875757397211e-07, "loss": 0.0461, "step": 9167 }, { "epoch": 2.5, "grad_norm": 1.6689980251471799, "learning_rate": 7.030338736654629e-07, "loss": 0.0381, "step": 9168 }, { "epoch": 2.5, "grad_norm": 1.7461677607971402, "learning_rate": 7.022805448660719e-07, "loss": 0.0538, "step": 9169 }, { "epoch": 2.5, "grad_norm": 1.5844878706328465, "learning_rate": 7.015275894069862e-07, "loss": 0.0526, "step": 9170 }, { "epoch": 2.5, "grad_norm": 1.5224405119550164, "learning_rate": 7.007750073536179e-07, "loss": 0.043, "step": 9171 }, { "epoch": 2.5, "grad_norm": 1.5387973710966365, "learning_rate": 7.000227987713415e-07, "loss": 0.0445, "step": 9172 }, { "epoch": 2.5, "grad_norm": 1.5559841441470788, "learning_rate": 6.992709637255007e-07, "loss": 0.0391, "step": 9173 }, { "epoch": 2.5, "grad_norm": 1.6610742721382772, "learning_rate": 6.985195022814068e-07, "loss": 0.0472, "step": 9174 }, { "epoch": 2.5, "grad_norm": 1.5973456885303319, "learning_rate": 6.9776841450434e-07, "loss": 0.0461, "step": 9175 }, { "epoch": 2.51, "grad_norm": 1.4824059611412461, "learning_rate": 6.970177004595452e-07, "loss": 0.0458, "step": 9176 }, { "epoch": 2.51, "grad_norm": 1.7082853378616765, "learning_rate": 6.962673602122388e-07, "loss": 0.0515, "step": 9177 }, { "epoch": 2.51, "grad_norm": 1.196760984309167, "learning_rate": 6.955173938276011e-07, "loss": 0.0372, "step": 9178 }, { "epoch": 2.51, "grad_norm": 1.5315850464093408, "learning_rate": 6.947678013707809e-07, "loss": 0.0453, "step": 9179 }, { "epoch": 2.51, "grad_norm": 1.5257737735715897, "learning_rate": 6.940185829068946e-07, "loss": 0.0516, "step": 9180 }, { "epoch": 2.51, "grad_norm": 1.2746727349338245, "learning_rate": 6.932697385010273e-07, "loss": 0.0348, "step": 9181 }, { "epoch": 2.51, "grad_norm": 1.4676712665266491, "learning_rate": 6.925212682182298e-07, "loss": 0.0389, "step": 9182 }, { "epoch": 2.51, "grad_norm": 1.382390721292978, "learning_rate": 6.917731721235227e-07, "loss": 0.0341, "step": 9183 }, { "epoch": 2.51, "grad_norm": 1.7156762319224332, "learning_rate": 6.910254502818914e-07, "loss": 0.0501, "step": 9184 }, { "epoch": 2.51, "grad_norm": 1.5315022220843613, "learning_rate": 6.902781027582905e-07, "loss": 0.0474, "step": 9185 }, { "epoch": 2.51, "grad_norm": 1.3430577575572498, "learning_rate": 6.895311296176404e-07, "loss": 0.0448, "step": 9186 }, { "epoch": 2.51, "grad_norm": 1.7919947656392654, "learning_rate": 6.887845309248326e-07, "loss": 0.046, "step": 9187 }, { "epoch": 2.51, "grad_norm": 1.5762793180089436, "learning_rate": 6.880383067447211e-07, "loss": 0.0524, "step": 9188 }, { "epoch": 2.51, "grad_norm": 1.7235385464822204, "learning_rate": 6.872924571421318e-07, "loss": 0.0476, "step": 9189 }, { "epoch": 2.51, "grad_norm": 1.7512384186112453, "learning_rate": 6.865469821818566e-07, "loss": 0.0474, "step": 9190 }, { "epoch": 2.51, "grad_norm": 1.367431366014104, "learning_rate": 6.858018819286527e-07, "loss": 0.0421, "step": 9191 }, { "epoch": 2.51, "grad_norm": 1.482470364816719, "learning_rate": 6.850571564472463e-07, "loss": 0.0461, "step": 9192 }, { "epoch": 2.51, "grad_norm": 1.3204672103503778, "learning_rate": 6.843128058023335e-07, "loss": 0.0392, "step": 9193 }, { "epoch": 2.51, "grad_norm": 1.4637631717892052, "learning_rate": 6.835688300585735e-07, "loss": 0.0403, "step": 9194 }, { "epoch": 2.51, "grad_norm": 1.8680475887151076, "learning_rate": 6.828252292805965e-07, "loss": 0.0481, "step": 9195 }, { "epoch": 2.51, "grad_norm": 1.572716331843182, "learning_rate": 6.820820035329984e-07, "loss": 0.0491, "step": 9196 }, { "epoch": 2.51, "grad_norm": 1.2657270178321114, "learning_rate": 6.813391528803426e-07, "loss": 0.0436, "step": 9197 }, { "epoch": 2.51, "grad_norm": 1.5149542145923707, "learning_rate": 6.80596677387158e-07, "loss": 0.05, "step": 9198 }, { "epoch": 2.51, "grad_norm": 1.3473478657768, "learning_rate": 6.798545771179466e-07, "loss": 0.039, "step": 9199 }, { "epoch": 2.51, "grad_norm": 1.391550161682129, "learning_rate": 6.791128521371715e-07, "loss": 0.041, "step": 9200 }, { "epoch": 2.51, "grad_norm": 1.325188866237484, "learning_rate": 6.783715025092674e-07, "loss": 0.0434, "step": 9201 }, { "epoch": 2.51, "grad_norm": 1.238757577990556, "learning_rate": 6.776305282986346e-07, "loss": 0.0388, "step": 9202 }, { "epoch": 2.51, "grad_norm": 1.6936175802651203, "learning_rate": 6.768899295696413e-07, "loss": 0.0486, "step": 9203 }, { "epoch": 2.51, "grad_norm": 1.5645034524621013, "learning_rate": 6.761497063866207e-07, "loss": 0.0512, "step": 9204 }, { "epoch": 2.51, "grad_norm": 1.5128111396100308, "learning_rate": 6.754098588138791e-07, "loss": 0.0398, "step": 9205 }, { "epoch": 2.51, "grad_norm": 1.55964305812144, "learning_rate": 6.746703869156829e-07, "loss": 0.0531, "step": 9206 }, { "epoch": 2.51, "grad_norm": 1.865922968543829, "learning_rate": 6.739312907562734e-07, "loss": 0.0549, "step": 9207 }, { "epoch": 2.51, "grad_norm": 1.730120597173305, "learning_rate": 6.731925703998526e-07, "loss": 0.0452, "step": 9208 }, { "epoch": 2.51, "grad_norm": 1.526950796569622, "learning_rate": 6.724542259105943e-07, "loss": 0.0418, "step": 9209 }, { "epoch": 2.51, "grad_norm": 1.6833136013791101, "learning_rate": 6.717162573526359e-07, "loss": 0.0512, "step": 9210 }, { "epoch": 2.51, "grad_norm": 1.676693408619882, "learning_rate": 6.709786647900874e-07, "loss": 0.0501, "step": 9211 }, { "epoch": 2.51, "grad_norm": 1.3358527261887407, "learning_rate": 6.702414482870195e-07, "loss": 0.0401, "step": 9212 }, { "epoch": 2.52, "grad_norm": 1.445444937305827, "learning_rate": 6.695046079074774e-07, "loss": 0.0385, "step": 9213 }, { "epoch": 2.52, "grad_norm": 1.383730121091557, "learning_rate": 6.687681437154681e-07, "loss": 0.0418, "step": 9214 }, { "epoch": 2.52, "grad_norm": 1.4009520205237163, "learning_rate": 6.680320557749675e-07, "loss": 0.0444, "step": 9215 }, { "epoch": 2.52, "grad_norm": 1.4730571523622686, "learning_rate": 6.672963441499186e-07, "loss": 0.0496, "step": 9216 }, { "epoch": 2.52, "grad_norm": 1.4193557421202503, "learning_rate": 6.66561008904234e-07, "loss": 0.0416, "step": 9217 }, { "epoch": 2.52, "grad_norm": 1.4044115948524016, "learning_rate": 6.658260501017905e-07, "loss": 0.0412, "step": 9218 }, { "epoch": 2.52, "grad_norm": 1.7636711477410567, "learning_rate": 6.650914678064346e-07, "loss": 0.0557, "step": 9219 }, { "epoch": 2.52, "grad_norm": 1.3283509774025268, "learning_rate": 6.643572620819783e-07, "loss": 0.0301, "step": 9220 }, { "epoch": 2.52, "grad_norm": 1.4811753756869708, "learning_rate": 6.63623432992202e-07, "loss": 0.0462, "step": 9221 }, { "epoch": 2.52, "grad_norm": 1.2914863031586483, "learning_rate": 6.628899806008515e-07, "loss": 0.0432, "step": 9222 }, { "epoch": 2.52, "grad_norm": 1.6641015925117764, "learning_rate": 6.621569049716442e-07, "loss": 0.0557, "step": 9223 }, { "epoch": 2.52, "grad_norm": 1.4158388268219357, "learning_rate": 6.614242061682585e-07, "loss": 0.042, "step": 9224 }, { "epoch": 2.52, "grad_norm": 1.4025117825778293, "learning_rate": 6.606918842543481e-07, "loss": 0.0355, "step": 9225 }, { "epoch": 2.52, "grad_norm": 1.5946008237703155, "learning_rate": 6.599599392935241e-07, "loss": 0.0461, "step": 9226 }, { "epoch": 2.52, "grad_norm": 1.6414571574034247, "learning_rate": 6.592283713493741e-07, "loss": 0.0549, "step": 9227 }, { "epoch": 2.52, "grad_norm": 1.5732034354358095, "learning_rate": 6.584971804854457e-07, "loss": 0.0515, "step": 9228 }, { "epoch": 2.52, "grad_norm": 1.8049634188996078, "learning_rate": 6.577663667652595e-07, "loss": 0.0586, "step": 9229 }, { "epoch": 2.52, "grad_norm": 1.7082091171672416, "learning_rate": 6.570359302523011e-07, "loss": 0.0478, "step": 9230 }, { "epoch": 2.52, "grad_norm": 1.5798190716279514, "learning_rate": 6.563058710100218e-07, "loss": 0.0521, "step": 9231 }, { "epoch": 2.52, "grad_norm": 1.6792697881846939, "learning_rate": 6.55576189101842e-07, "loss": 0.0522, "step": 9232 }, { "epoch": 2.52, "grad_norm": 1.38889217632605, "learning_rate": 6.548468845911471e-07, "loss": 0.0417, "step": 9233 }, { "epoch": 2.52, "grad_norm": 1.4232526639331131, "learning_rate": 6.541179575412942e-07, "loss": 0.0457, "step": 9234 }, { "epoch": 2.52, "grad_norm": 1.6222448425794103, "learning_rate": 6.533894080156017e-07, "loss": 0.0463, "step": 9235 }, { "epoch": 2.52, "grad_norm": 1.5032123485024722, "learning_rate": 6.526612360773615e-07, "loss": 0.0408, "step": 9236 }, { "epoch": 2.52, "grad_norm": 1.4639932929462964, "learning_rate": 6.519334417898277e-07, "loss": 0.0421, "step": 9237 }, { "epoch": 2.52, "grad_norm": 1.4222474806463148, "learning_rate": 6.512060252162228e-07, "loss": 0.0444, "step": 9238 }, { "epoch": 2.52, "grad_norm": 1.5655759177579105, "learning_rate": 6.504789864197375e-07, "loss": 0.0552, "step": 9239 }, { "epoch": 2.52, "grad_norm": 1.3414952635130217, "learning_rate": 6.497523254635296e-07, "loss": 0.0424, "step": 9240 }, { "epoch": 2.52, "grad_norm": 1.3160366227969231, "learning_rate": 6.490260424107231e-07, "loss": 0.0371, "step": 9241 }, { "epoch": 2.52, "grad_norm": 1.4885130425785391, "learning_rate": 6.483001373244107e-07, "loss": 0.0459, "step": 9242 }, { "epoch": 2.52, "grad_norm": 1.8104844383498992, "learning_rate": 6.475746102676517e-07, "loss": 0.0512, "step": 9243 }, { "epoch": 2.52, "grad_norm": 1.7218507461531034, "learning_rate": 6.468494613034704e-07, "loss": 0.0526, "step": 9244 }, { "epoch": 2.52, "grad_norm": 1.6974153564100232, "learning_rate": 6.461246904948604e-07, "loss": 0.0516, "step": 9245 }, { "epoch": 2.52, "grad_norm": 1.362255119743437, "learning_rate": 6.454002979047836e-07, "loss": 0.0382, "step": 9246 }, { "epoch": 2.52, "grad_norm": 1.3405947808462468, "learning_rate": 6.446762835961656e-07, "loss": 0.0438, "step": 9247 }, { "epoch": 2.52, "grad_norm": 1.6080820401130123, "learning_rate": 6.439526476319031e-07, "loss": 0.0419, "step": 9248 }, { "epoch": 2.52, "grad_norm": 1.6322231813534775, "learning_rate": 6.432293900748571e-07, "loss": 0.0423, "step": 9249 }, { "epoch": 2.53, "grad_norm": 1.7618590367801918, "learning_rate": 6.425065109878559e-07, "loss": 0.0541, "step": 9250 }, { "epoch": 2.53, "grad_norm": 1.5443724106379078, "learning_rate": 6.417840104336953e-07, "loss": 0.0424, "step": 9251 }, { "epoch": 2.53, "grad_norm": 1.361147940565434, "learning_rate": 6.410618884751407e-07, "loss": 0.036, "step": 9252 }, { "epoch": 2.53, "grad_norm": 1.6279273750122605, "learning_rate": 6.403401451749197e-07, "loss": 0.0541, "step": 9253 }, { "epoch": 2.53, "grad_norm": 1.4105709248009561, "learning_rate": 6.396187805957315e-07, "loss": 0.0406, "step": 9254 }, { "epoch": 2.53, "grad_norm": 1.4051043991359757, "learning_rate": 6.388977948002406e-07, "loss": 0.0413, "step": 9255 }, { "epoch": 2.53, "grad_norm": 1.4966326023433527, "learning_rate": 6.381771878510779e-07, "loss": 0.0502, "step": 9256 }, { "epoch": 2.53, "grad_norm": 1.1587844564237084, "learning_rate": 6.37456959810841e-07, "loss": 0.031, "step": 9257 }, { "epoch": 2.53, "grad_norm": 1.73880482175389, "learning_rate": 6.367371107420983e-07, "loss": 0.0542, "step": 9258 }, { "epoch": 2.53, "grad_norm": 1.355296823024841, "learning_rate": 6.360176407073798e-07, "loss": 0.0446, "step": 9259 }, { "epoch": 2.53, "grad_norm": 1.3758192502550173, "learning_rate": 6.352985497691883e-07, "loss": 0.042, "step": 9260 }, { "epoch": 2.53, "grad_norm": 1.470427061121933, "learning_rate": 6.345798379899898e-07, "loss": 0.0453, "step": 9261 }, { "epoch": 2.53, "grad_norm": 1.4650421763447183, "learning_rate": 6.338615054322173e-07, "loss": 0.0513, "step": 9262 }, { "epoch": 2.53, "grad_norm": 1.3292632706227707, "learning_rate": 6.331435521582718e-07, "loss": 0.0362, "step": 9263 }, { "epoch": 2.53, "grad_norm": 1.5117734241372645, "learning_rate": 6.324259782305237e-07, "loss": 0.041, "step": 9264 }, { "epoch": 2.53, "grad_norm": 1.3517403952211193, "learning_rate": 6.31708783711305e-07, "loss": 0.0404, "step": 9265 }, { "epoch": 2.53, "grad_norm": 1.4673009304547475, "learning_rate": 6.309919686629212e-07, "loss": 0.0468, "step": 9266 }, { "epoch": 2.53, "grad_norm": 1.3759096290853892, "learning_rate": 6.302755331476401e-07, "loss": 0.0435, "step": 9267 }, { "epoch": 2.53, "grad_norm": 1.4391997785213244, "learning_rate": 6.295594772276981e-07, "loss": 0.0444, "step": 9268 }, { "epoch": 2.53, "grad_norm": 1.436912540357196, "learning_rate": 6.288438009652969e-07, "loss": 0.0493, "step": 9269 }, { "epoch": 2.53, "grad_norm": 1.6853775413334768, "learning_rate": 6.281285044226104e-07, "loss": 0.054, "step": 9270 }, { "epoch": 2.53, "grad_norm": 1.3539939606525964, "learning_rate": 6.274135876617726e-07, "loss": 0.0408, "step": 9271 }, { "epoch": 2.53, "grad_norm": 1.1652654143620704, "learning_rate": 6.2669905074489e-07, "loss": 0.0334, "step": 9272 }, { "epoch": 2.53, "grad_norm": 1.6040188450962098, "learning_rate": 6.25984893734034e-07, "loss": 0.0532, "step": 9273 }, { "epoch": 2.53, "grad_norm": 1.6332789250665911, "learning_rate": 6.252711166912418e-07, "loss": 0.0524, "step": 9274 }, { "epoch": 2.53, "grad_norm": 1.2154998608562746, "learning_rate": 6.245577196785186e-07, "loss": 0.0335, "step": 9275 }, { "epoch": 2.53, "grad_norm": 1.6350521779976859, "learning_rate": 6.238447027578387e-07, "loss": 0.0418, "step": 9276 }, { "epoch": 2.53, "grad_norm": 1.615776139702605, "learning_rate": 6.231320659911388e-07, "loss": 0.0486, "step": 9277 }, { "epoch": 2.53, "grad_norm": 1.312790196202859, "learning_rate": 6.224198094403278e-07, "loss": 0.046, "step": 9278 }, { "epoch": 2.53, "grad_norm": 1.347429532863126, "learning_rate": 6.217079331672777e-07, "loss": 0.0373, "step": 9279 }, { "epoch": 2.53, "grad_norm": 1.425931296115451, "learning_rate": 6.20996437233829e-07, "loss": 0.043, "step": 9280 }, { "epoch": 2.53, "grad_norm": 1.841467047033049, "learning_rate": 6.202853217017879e-07, "loss": 0.0569, "step": 9281 }, { "epoch": 2.53, "grad_norm": 1.3220236107840497, "learning_rate": 6.195745866329305e-07, "loss": 0.0416, "step": 9282 }, { "epoch": 2.53, "grad_norm": 1.707855717149118, "learning_rate": 6.188642320889959e-07, "loss": 0.0548, "step": 9283 }, { "epoch": 2.53, "grad_norm": 1.3117725998502034, "learning_rate": 6.181542581316941e-07, "loss": 0.0409, "step": 9284 }, { "epoch": 2.53, "grad_norm": 1.1135003004366497, "learning_rate": 6.174446648226995e-07, "loss": 0.0359, "step": 9285 }, { "epoch": 2.54, "grad_norm": 1.2542941203560147, "learning_rate": 6.167354522236535e-07, "loss": 0.0377, "step": 9286 }, { "epoch": 2.54, "grad_norm": 1.525791939327361, "learning_rate": 6.160266203961645e-07, "loss": 0.0381, "step": 9287 }, { "epoch": 2.54, "grad_norm": 1.4542004719795047, "learning_rate": 6.153181694018101e-07, "loss": 0.0391, "step": 9288 }, { "epoch": 2.54, "grad_norm": 1.4369041304255807, "learning_rate": 6.146100993021308e-07, "loss": 0.0457, "step": 9289 }, { "epoch": 2.54, "grad_norm": 1.5730652059783, "learning_rate": 6.139024101586383e-07, "loss": 0.051, "step": 9290 }, { "epoch": 2.54, "grad_norm": 1.5706678971258587, "learning_rate": 6.131951020328081e-07, "loss": 0.0477, "step": 9291 }, { "epoch": 2.54, "grad_norm": 1.4507090190316347, "learning_rate": 6.124881749860839e-07, "loss": 0.0513, "step": 9292 }, { "epoch": 2.54, "grad_norm": 1.5157359426230064, "learning_rate": 6.117816290798751e-07, "loss": 0.0461, "step": 9293 }, { "epoch": 2.54, "grad_norm": 1.4491017462610927, "learning_rate": 6.110754643755606e-07, "loss": 0.0537, "step": 9294 }, { "epoch": 2.54, "grad_norm": 1.511588796433832, "learning_rate": 6.103696809344823e-07, "loss": 0.049, "step": 9295 }, { "epoch": 2.54, "grad_norm": 1.6418665109222186, "learning_rate": 6.096642788179535e-07, "loss": 0.0449, "step": 9296 }, { "epoch": 2.54, "grad_norm": 1.5615796065450795, "learning_rate": 6.089592580872511e-07, "loss": 0.0483, "step": 9297 }, { "epoch": 2.54, "grad_norm": 1.646715306849953, "learning_rate": 6.082546188036204e-07, "loss": 0.043, "step": 9298 }, { "epoch": 2.54, "grad_norm": 1.409845531805855, "learning_rate": 6.075503610282707e-07, "loss": 0.0455, "step": 9299 }, { "epoch": 2.54, "grad_norm": 1.5084798445332748, "learning_rate": 6.068464848223831e-07, "loss": 0.0482, "step": 9300 }, { "epoch": 2.54, "grad_norm": 1.5154741647755088, "learning_rate": 6.061429902471011e-07, "loss": 0.0366, "step": 9301 }, { "epoch": 2.54, "grad_norm": 1.7248905250114255, "learning_rate": 6.054398773635395e-07, "loss": 0.0518, "step": 9302 }, { "epoch": 2.54, "grad_norm": 1.3060936472282296, "learning_rate": 6.047371462327733e-07, "loss": 0.0389, "step": 9303 }, { "epoch": 2.54, "grad_norm": 1.3389706740125993, "learning_rate": 6.040347969158517e-07, "loss": 0.0397, "step": 9304 }, { "epoch": 2.54, "grad_norm": 1.4878187323089656, "learning_rate": 6.03332829473785e-07, "loss": 0.0489, "step": 9305 }, { "epoch": 2.54, "grad_norm": 1.5757453983418181, "learning_rate": 6.026312439675553e-07, "loss": 0.0507, "step": 9306 }, { "epoch": 2.54, "grad_norm": 1.4519569305653428, "learning_rate": 6.019300404581057e-07, "loss": 0.0466, "step": 9307 }, { "epoch": 2.54, "grad_norm": 1.4549603392332116, "learning_rate": 6.012292190063535e-07, "loss": 0.0432, "step": 9308 }, { "epoch": 2.54, "grad_norm": 1.3277910538830735, "learning_rate": 6.005287796731746e-07, "loss": 0.0408, "step": 9309 }, { "epoch": 2.54, "grad_norm": 1.5773560520113425, "learning_rate": 5.998287225194177e-07, "loss": 0.0548, "step": 9310 }, { "epoch": 2.54, "grad_norm": 1.3295425083017247, "learning_rate": 5.991290476058953e-07, "loss": 0.0442, "step": 9311 }, { "epoch": 2.54, "grad_norm": 1.331608052580573, "learning_rate": 5.984297549933893e-07, "loss": 0.0427, "step": 9312 }, { "epoch": 2.54, "grad_norm": 1.497564092423009, "learning_rate": 5.97730844742645e-07, "loss": 0.0468, "step": 9313 }, { "epoch": 2.54, "grad_norm": 1.6906800811012426, "learning_rate": 5.970323169143793e-07, "loss": 0.0522, "step": 9314 }, { "epoch": 2.54, "grad_norm": 1.4527454842122562, "learning_rate": 5.963341715692689e-07, "loss": 0.0469, "step": 9315 }, { "epoch": 2.54, "grad_norm": 1.4022621882997055, "learning_rate": 5.956364087679644e-07, "loss": 0.0436, "step": 9316 }, { "epoch": 2.54, "grad_norm": 1.3522228791772575, "learning_rate": 5.949390285710777e-07, "loss": 0.0459, "step": 9317 }, { "epoch": 2.54, "grad_norm": 1.5420115392691391, "learning_rate": 5.942420310391916e-07, "loss": 0.0456, "step": 9318 }, { "epoch": 2.54, "grad_norm": 1.2810661070643923, "learning_rate": 5.93545416232853e-07, "loss": 0.0406, "step": 9319 }, { "epoch": 2.54, "grad_norm": 1.6907657552588, "learning_rate": 5.928491842125783e-07, "loss": 0.0418, "step": 9320 }, { "epoch": 2.54, "grad_norm": 1.4718772456581073, "learning_rate": 5.921533350388448e-07, "loss": 0.042, "step": 9321 }, { "epoch": 2.54, "grad_norm": 1.2634409351652187, "learning_rate": 5.914578687721034e-07, "loss": 0.0333, "step": 9322 }, { "epoch": 2.55, "grad_norm": 1.355904576735825, "learning_rate": 5.907627854727688e-07, "loss": 0.0447, "step": 9323 }, { "epoch": 2.55, "grad_norm": 1.2270687372438003, "learning_rate": 5.900680852012209e-07, "loss": 0.0379, "step": 9324 }, { "epoch": 2.55, "grad_norm": 1.7310683579335753, "learning_rate": 5.893737680178102e-07, "loss": 0.0478, "step": 9325 }, { "epoch": 2.55, "grad_norm": 1.3703101561172066, "learning_rate": 5.886798339828498e-07, "loss": 0.0417, "step": 9326 }, { "epoch": 2.55, "grad_norm": 1.5655299570881718, "learning_rate": 5.879862831566225e-07, "loss": 0.0449, "step": 9327 }, { "epoch": 2.55, "grad_norm": 1.6846351679118978, "learning_rate": 5.872931155993744e-07, "loss": 0.0409, "step": 9328 }, { "epoch": 2.55, "grad_norm": 1.475778780504766, "learning_rate": 5.866003313713231e-07, "loss": 0.0452, "step": 9329 }, { "epoch": 2.55, "grad_norm": 1.3535298060701344, "learning_rate": 5.859079305326487e-07, "loss": 0.04, "step": 9330 }, { "epoch": 2.55, "grad_norm": 1.4156496614101144, "learning_rate": 5.852159131435015e-07, "loss": 0.045, "step": 9331 }, { "epoch": 2.55, "grad_norm": 1.4696627664881157, "learning_rate": 5.845242792639955e-07, "loss": 0.0459, "step": 9332 }, { "epoch": 2.55, "grad_norm": 1.5132987026955178, "learning_rate": 5.838330289542121e-07, "loss": 0.0421, "step": 9333 }, { "epoch": 2.55, "grad_norm": 1.5638255493013575, "learning_rate": 5.831421622741995e-07, "loss": 0.0485, "step": 9334 }, { "epoch": 2.55, "grad_norm": 1.4464948469440047, "learning_rate": 5.82451679283974e-07, "loss": 0.0426, "step": 9335 }, { "epoch": 2.55, "grad_norm": 1.6653777623951753, "learning_rate": 5.817615800435167e-07, "loss": 0.0482, "step": 9336 }, { "epoch": 2.55, "grad_norm": 1.5987814745554494, "learning_rate": 5.810718646127772e-07, "loss": 0.0471, "step": 9337 }, { "epoch": 2.55, "grad_norm": 1.6159677273389355, "learning_rate": 5.803825330516699e-07, "loss": 0.0415, "step": 9338 }, { "epoch": 2.55, "grad_norm": 1.7725605347204223, "learning_rate": 5.796935854200764e-07, "loss": 0.0486, "step": 9339 }, { "epoch": 2.55, "grad_norm": 1.2116651530771918, "learning_rate": 5.790050217778442e-07, "loss": 0.0365, "step": 9340 }, { "epoch": 2.55, "grad_norm": 1.3366514082309722, "learning_rate": 5.783168421847912e-07, "loss": 0.0392, "step": 9341 }, { "epoch": 2.55, "grad_norm": 1.5357472584251142, "learning_rate": 5.776290467006961e-07, "loss": 0.0441, "step": 9342 }, { "epoch": 2.55, "grad_norm": 1.4931475573569182, "learning_rate": 5.769416353853097e-07, "loss": 0.0431, "step": 9343 }, { "epoch": 2.55, "grad_norm": 1.5973699024862298, "learning_rate": 5.762546082983462e-07, "loss": 0.0427, "step": 9344 }, { "epoch": 2.55, "grad_norm": 1.4727593058720514, "learning_rate": 5.755679654994866e-07, "loss": 0.0371, "step": 9345 }, { "epoch": 2.55, "grad_norm": 1.3102635345845686, "learning_rate": 5.748817070483792e-07, "loss": 0.0352, "step": 9346 }, { "epoch": 2.55, "grad_norm": 1.5098352210174155, "learning_rate": 5.741958330046399e-07, "loss": 0.0448, "step": 9347 }, { "epoch": 2.55, "grad_norm": 1.7112577625926164, "learning_rate": 5.735103434278482e-07, "loss": 0.058, "step": 9348 }, { "epoch": 2.55, "grad_norm": 1.3312174687743756, "learning_rate": 5.728252383775551e-07, "loss": 0.037, "step": 9349 }, { "epoch": 2.55, "grad_norm": 1.6262182635753129, "learning_rate": 5.721405179132733e-07, "loss": 0.0478, "step": 9350 }, { "epoch": 2.55, "grad_norm": 1.5288705711280295, "learning_rate": 5.714561820944848e-07, "loss": 0.0498, "step": 9351 }, { "epoch": 2.55, "grad_norm": 1.3130130647280651, "learning_rate": 5.707722309806352e-07, "loss": 0.0398, "step": 9352 }, { "epoch": 2.55, "grad_norm": 1.5272330078855576, "learning_rate": 5.700886646311427e-07, "loss": 0.0489, "step": 9353 }, { "epoch": 2.55, "grad_norm": 1.8284411040755089, "learning_rate": 5.694054831053847e-07, "loss": 0.0469, "step": 9354 }, { "epoch": 2.55, "grad_norm": 1.2921777293746586, "learning_rate": 5.687226864627115e-07, "loss": 0.0401, "step": 9355 }, { "epoch": 2.55, "grad_norm": 1.5249134050999804, "learning_rate": 5.680402747624364e-07, "loss": 0.0462, "step": 9356 }, { "epoch": 2.55, "grad_norm": 1.5415871212250776, "learning_rate": 5.673582480638395e-07, "loss": 0.0486, "step": 9357 }, { "epoch": 2.55, "grad_norm": 1.4828366130767072, "learning_rate": 5.666766064261681e-07, "loss": 0.0495, "step": 9358 }, { "epoch": 2.56, "grad_norm": 1.7910842337444195, "learning_rate": 5.659953499086368e-07, "loss": 0.0488, "step": 9359 }, { "epoch": 2.56, "grad_norm": 1.3899166506067544, "learning_rate": 5.653144785704245e-07, "loss": 0.0422, "step": 9360 }, { "epoch": 2.56, "grad_norm": 1.8409343123085, "learning_rate": 5.6463399247068e-07, "loss": 0.055, "step": 9361 }, { "epoch": 2.56, "grad_norm": 1.7177966708246812, "learning_rate": 5.639538916685161e-07, "loss": 0.0448, "step": 9362 }, { "epoch": 2.56, "grad_norm": 1.5371485866944967, "learning_rate": 5.63274176223012e-07, "loss": 0.0453, "step": 9363 }, { "epoch": 2.56, "grad_norm": 1.500343283868266, "learning_rate": 5.625948461932135e-07, "loss": 0.0503, "step": 9364 }, { "epoch": 2.56, "grad_norm": 1.3823709689841568, "learning_rate": 5.619159016381359e-07, "loss": 0.0418, "step": 9365 }, { "epoch": 2.56, "grad_norm": 1.3323515608385956, "learning_rate": 5.612373426167566e-07, "loss": 0.0393, "step": 9366 }, { "epoch": 2.56, "grad_norm": 1.358664271691672, "learning_rate": 5.60559169188023e-07, "loss": 0.0488, "step": 9367 }, { "epoch": 2.56, "grad_norm": 1.3337144003115673, "learning_rate": 5.598813814108478e-07, "loss": 0.0423, "step": 9368 }, { "epoch": 2.56, "grad_norm": 1.307399947489894, "learning_rate": 5.59203979344109e-07, "loss": 0.0391, "step": 9369 }, { "epoch": 2.56, "grad_norm": 1.3910824184274528, "learning_rate": 5.585269630466511e-07, "loss": 0.045, "step": 9370 }, { "epoch": 2.56, "grad_norm": 1.6143070674202626, "learning_rate": 5.578503325772889e-07, "loss": 0.048, "step": 9371 }, { "epoch": 2.56, "grad_norm": 1.5642765953222224, "learning_rate": 5.571740879947979e-07, "loss": 0.0484, "step": 9372 }, { "epoch": 2.56, "grad_norm": 1.5730907122242064, "learning_rate": 5.564982293579258e-07, "loss": 0.0479, "step": 9373 }, { "epoch": 2.56, "grad_norm": 1.2930179538546491, "learning_rate": 5.558227567253832e-07, "loss": 0.0394, "step": 9374 }, { "epoch": 2.56, "grad_norm": 1.5825501312858514, "learning_rate": 5.551476701558473e-07, "loss": 0.0514, "step": 9375 }, { "epoch": 2.56, "grad_norm": 1.4867669702295414, "learning_rate": 5.544729697079615e-07, "loss": 0.0407, "step": 9376 }, { "epoch": 2.56, "grad_norm": 1.4824601417061116, "learning_rate": 5.537986554403391e-07, "loss": 0.0503, "step": 9377 }, { "epoch": 2.56, "grad_norm": 1.5718637874099572, "learning_rate": 5.531247274115553e-07, "loss": 0.0458, "step": 9378 }, { "epoch": 2.56, "grad_norm": 1.7632920201416338, "learning_rate": 5.524511856801567e-07, "loss": 0.0521, "step": 9379 }, { "epoch": 2.56, "grad_norm": 1.53203699301683, "learning_rate": 5.517780303046494e-07, "loss": 0.0427, "step": 9380 }, { "epoch": 2.56, "grad_norm": 1.4663945088993786, "learning_rate": 5.511052613435131e-07, "loss": 0.0435, "step": 9381 }, { "epoch": 2.56, "grad_norm": 1.4457285922026681, "learning_rate": 5.504328788551888e-07, "loss": 0.0453, "step": 9382 }, { "epoch": 2.56, "grad_norm": 1.45961542746993, "learning_rate": 5.497608828980877e-07, "loss": 0.043, "step": 9383 }, { "epoch": 2.56, "grad_norm": 1.5230644950006442, "learning_rate": 5.490892735305842e-07, "loss": 0.0459, "step": 9384 }, { "epoch": 2.56, "grad_norm": 1.523004316136607, "learning_rate": 5.484180508110232e-07, "loss": 0.0487, "step": 9385 }, { "epoch": 2.56, "grad_norm": 1.8684622774376212, "learning_rate": 5.477472147977097e-07, "loss": 0.0538, "step": 9386 }, { "epoch": 2.56, "grad_norm": 1.4833514447720368, "learning_rate": 5.470767655489217e-07, "loss": 0.0415, "step": 9387 }, { "epoch": 2.56, "grad_norm": 1.593617356309045, "learning_rate": 5.464067031228987e-07, "loss": 0.0516, "step": 9388 }, { "epoch": 2.56, "grad_norm": 1.3943362612278334, "learning_rate": 5.457370275778506e-07, "loss": 0.0418, "step": 9389 }, { "epoch": 2.56, "grad_norm": 1.5165033016974703, "learning_rate": 5.450677389719494e-07, "loss": 0.0425, "step": 9390 }, { "epoch": 2.56, "grad_norm": 1.4436019870015415, "learning_rate": 5.443988373633397e-07, "loss": 0.0406, "step": 9391 }, { "epoch": 2.56, "grad_norm": 1.6795210602622075, "learning_rate": 5.437303228101238e-07, "loss": 0.0514, "step": 9392 }, { "epoch": 2.56, "grad_norm": 1.4252441487840701, "learning_rate": 5.430621953703785e-07, "loss": 0.0435, "step": 9393 }, { "epoch": 2.56, "grad_norm": 1.6414920293094153, "learning_rate": 5.423944551021409e-07, "loss": 0.0559, "step": 9394 }, { "epoch": 2.56, "grad_norm": 1.5078702152771486, "learning_rate": 5.417271020634207e-07, "loss": 0.0412, "step": 9395 }, { "epoch": 2.57, "grad_norm": 1.6842539671184469, "learning_rate": 5.41060136312187e-07, "loss": 0.0519, "step": 9396 }, { "epoch": 2.57, "grad_norm": 1.4328445366183313, "learning_rate": 5.403935579063824e-07, "loss": 0.0423, "step": 9397 }, { "epoch": 2.57, "grad_norm": 1.4699558178617385, "learning_rate": 5.397273669039083e-07, "loss": 0.0448, "step": 9398 }, { "epoch": 2.57, "grad_norm": 1.5151817262613314, "learning_rate": 5.390615633626384e-07, "loss": 0.0371, "step": 9399 }, { "epoch": 2.57, "grad_norm": 1.5243240678260968, "learning_rate": 5.383961473404098e-07, "loss": 0.0421, "step": 9400 }, { "epoch": 2.57, "grad_norm": 1.5187390755136203, "learning_rate": 5.377311188950279e-07, "loss": 0.0436, "step": 9401 }, { "epoch": 2.57, "grad_norm": 1.3205795879981002, "learning_rate": 5.370664780842622e-07, "loss": 0.0449, "step": 9402 }, { "epoch": 2.57, "grad_norm": 1.4413842704868112, "learning_rate": 5.364022249658519e-07, "loss": 0.0404, "step": 9403 }, { "epoch": 2.57, "grad_norm": 1.3966146927325749, "learning_rate": 5.357383595974969e-07, "loss": 0.0424, "step": 9404 }, { "epoch": 2.57, "grad_norm": 1.7454430441998872, "learning_rate": 5.350748820368689e-07, "loss": 0.0437, "step": 9405 }, { "epoch": 2.57, "grad_norm": 1.6705080782759565, "learning_rate": 5.344117923416026e-07, "loss": 0.055, "step": 9406 }, { "epoch": 2.57, "grad_norm": 2.327100049820239, "learning_rate": 5.337490905693016e-07, "loss": 0.0516, "step": 9407 }, { "epoch": 2.57, "grad_norm": 1.3836378415133572, "learning_rate": 5.330867767775333e-07, "loss": 0.0388, "step": 9408 }, { "epoch": 2.57, "grad_norm": 1.3747909813151689, "learning_rate": 5.324248510238345e-07, "loss": 0.0413, "step": 9409 }, { "epoch": 2.57, "grad_norm": 1.3015105217796543, "learning_rate": 5.317633133657029e-07, "loss": 0.0426, "step": 9410 }, { "epoch": 2.57, "grad_norm": 1.5816899056895841, "learning_rate": 5.311021638606084e-07, "loss": 0.0467, "step": 9411 }, { "epoch": 2.57, "grad_norm": 1.4776297669941274, "learning_rate": 5.304414025659832e-07, "loss": 0.0458, "step": 9412 }, { "epoch": 2.57, "grad_norm": 1.6886019805220323, "learning_rate": 5.297810295392291e-07, "loss": 0.0512, "step": 9413 }, { "epoch": 2.57, "grad_norm": 1.4024395631991442, "learning_rate": 5.291210448377099e-07, "loss": 0.038, "step": 9414 }, { "epoch": 2.57, "grad_norm": 1.496374015976713, "learning_rate": 5.284614485187606e-07, "loss": 0.0479, "step": 9415 }, { "epoch": 2.57, "grad_norm": 1.4703070558785776, "learning_rate": 5.278022406396788e-07, "loss": 0.0496, "step": 9416 }, { "epoch": 2.57, "grad_norm": 1.610789699533333, "learning_rate": 5.271434212577281e-07, "loss": 0.0492, "step": 9417 }, { "epoch": 2.57, "grad_norm": 1.4449983962305737, "learning_rate": 5.264849904301422e-07, "loss": 0.0425, "step": 9418 }, { "epoch": 2.57, "grad_norm": 1.328663111603859, "learning_rate": 5.258269482141165e-07, "loss": 0.0432, "step": 9419 }, { "epoch": 2.57, "grad_norm": 1.409689085100629, "learning_rate": 5.251692946668169e-07, "loss": 0.0476, "step": 9420 }, { "epoch": 2.57, "grad_norm": 1.5474513427162788, "learning_rate": 5.245120298453715e-07, "loss": 0.0481, "step": 9421 }, { "epoch": 2.57, "grad_norm": 1.766975422799182, "learning_rate": 5.238551538068776e-07, "loss": 0.054, "step": 9422 }, { "epoch": 2.57, "grad_norm": 1.4026953004612066, "learning_rate": 5.23198666608396e-07, "loss": 0.0454, "step": 9423 }, { "epoch": 2.57, "grad_norm": 1.295197058555472, "learning_rate": 5.22542568306958e-07, "loss": 0.0403, "step": 9424 }, { "epoch": 2.57, "grad_norm": 1.5130454643863047, "learning_rate": 5.218868589595555e-07, "loss": 0.0405, "step": 9425 }, { "epoch": 2.57, "grad_norm": 1.5970329732191668, "learning_rate": 5.21231538623152e-07, "loss": 0.0548, "step": 9426 }, { "epoch": 2.57, "grad_norm": 1.5037455687960726, "learning_rate": 5.205766073546742e-07, "loss": 0.0468, "step": 9427 }, { "epoch": 2.57, "grad_norm": 1.5931211400825729, "learning_rate": 5.199220652110148e-07, "loss": 0.0417, "step": 9428 }, { "epoch": 2.57, "grad_norm": 1.5350254098100875, "learning_rate": 5.19267912249033e-07, "loss": 0.0489, "step": 9429 }, { "epoch": 2.57, "grad_norm": 1.544681422183019, "learning_rate": 5.186141485255569e-07, "loss": 0.0436, "step": 9430 }, { "epoch": 2.57, "grad_norm": 1.5045752686857166, "learning_rate": 5.179607740973764e-07, "loss": 0.0534, "step": 9431 }, { "epoch": 2.57, "grad_norm": 1.4016989356848832, "learning_rate": 5.173077890212508e-07, "loss": 0.0449, "step": 9432 }, { "epoch": 2.58, "grad_norm": 1.3679013200555543, "learning_rate": 5.166551933539049e-07, "loss": 0.0393, "step": 9433 }, { "epoch": 2.58, "grad_norm": 1.412741537200752, "learning_rate": 5.160029871520284e-07, "loss": 0.0473, "step": 9434 }, { "epoch": 2.58, "grad_norm": 1.285126378454, "learning_rate": 5.153511704722775e-07, "loss": 0.0432, "step": 9435 }, { "epoch": 2.58, "grad_norm": 1.5473949157744449, "learning_rate": 5.146997433712769e-07, "loss": 0.0438, "step": 9436 }, { "epoch": 2.58, "grad_norm": 1.3398707130295597, "learning_rate": 5.140487059056143e-07, "loss": 0.0431, "step": 9437 }, { "epoch": 2.58, "grad_norm": 1.2907777540719125, "learning_rate": 5.133980581318459e-07, "loss": 0.0394, "step": 9438 }, { "epoch": 2.58, "grad_norm": 1.5461921453787888, "learning_rate": 5.127478001064928e-07, "loss": 0.0476, "step": 9439 }, { "epoch": 2.58, "grad_norm": 1.2466253673084657, "learning_rate": 5.120979318860419e-07, "loss": 0.0349, "step": 9440 }, { "epoch": 2.58, "grad_norm": 1.422079879613309, "learning_rate": 5.114484535269465e-07, "loss": 0.0421, "step": 9441 }, { "epoch": 2.58, "grad_norm": 1.4591550594869038, "learning_rate": 5.107993650856285e-07, "loss": 0.0406, "step": 9442 }, { "epoch": 2.58, "grad_norm": 1.4511048983545245, "learning_rate": 5.101506666184708e-07, "loss": 0.038, "step": 9443 }, { "epoch": 2.58, "grad_norm": 1.3487680519418748, "learning_rate": 5.095023581818287e-07, "loss": 0.0414, "step": 9444 }, { "epoch": 2.58, "grad_norm": 1.4867755700430791, "learning_rate": 5.088544398320189e-07, "loss": 0.0423, "step": 9445 }, { "epoch": 2.58, "grad_norm": 1.439062274577456, "learning_rate": 5.082069116253252e-07, "loss": 0.0412, "step": 9446 }, { "epoch": 2.58, "grad_norm": 1.247696186418509, "learning_rate": 5.075597736179977e-07, "loss": 0.0378, "step": 9447 }, { "epoch": 2.58, "grad_norm": 1.3988958666516418, "learning_rate": 5.069130258662541e-07, "loss": 0.0439, "step": 9448 }, { "epoch": 2.58, "grad_norm": 1.3810304775337894, "learning_rate": 5.062666684262757e-07, "loss": 0.0436, "step": 9449 }, { "epoch": 2.58, "grad_norm": 1.2471137972676791, "learning_rate": 5.056207013542131e-07, "loss": 0.0362, "step": 9450 }, { "epoch": 2.58, "grad_norm": 1.7717503129411514, "learning_rate": 5.049751247061796e-07, "loss": 0.0561, "step": 9451 }, { "epoch": 2.58, "grad_norm": 1.3781118365836218, "learning_rate": 5.04329938538256e-07, "loss": 0.0339, "step": 9452 }, { "epoch": 2.58, "grad_norm": 1.4630232072172364, "learning_rate": 5.036851429064893e-07, "loss": 0.0432, "step": 9453 }, { "epoch": 2.58, "grad_norm": 1.4949059316421671, "learning_rate": 5.030407378668939e-07, "loss": 0.039, "step": 9454 }, { "epoch": 2.58, "grad_norm": 1.5491035998803881, "learning_rate": 5.023967234754462e-07, "loss": 0.0467, "step": 9455 }, { "epoch": 2.58, "grad_norm": 1.7369319243384285, "learning_rate": 5.017530997880948e-07, "loss": 0.0429, "step": 9456 }, { "epoch": 2.58, "grad_norm": 1.2036899260130764, "learning_rate": 5.011098668607478e-07, "loss": 0.0328, "step": 9457 }, { "epoch": 2.58, "grad_norm": 1.6144115463247422, "learning_rate": 5.004670247492838e-07, "loss": 0.0465, "step": 9458 }, { "epoch": 2.58, "grad_norm": 1.6980740804591137, "learning_rate": 4.998245735095459e-07, "loss": 0.0448, "step": 9459 }, { "epoch": 2.58, "grad_norm": 1.5715629139801757, "learning_rate": 4.991825131973438e-07, "loss": 0.0427, "step": 9460 }, { "epoch": 2.58, "grad_norm": 1.5939347609083596, "learning_rate": 4.985408438684519e-07, "loss": 0.0451, "step": 9461 }, { "epoch": 2.58, "grad_norm": 1.4050774036195004, "learning_rate": 4.978995655786145e-07, "loss": 0.0409, "step": 9462 }, { "epoch": 2.58, "grad_norm": 1.758837100701156, "learning_rate": 4.972586783835348e-07, "loss": 0.052, "step": 9463 }, { "epoch": 2.58, "grad_norm": 1.5576675345820696, "learning_rate": 4.966181823388893e-07, "loss": 0.0486, "step": 9464 }, { "epoch": 2.58, "grad_norm": 1.4273600991779292, "learning_rate": 4.959780775003153e-07, "loss": 0.0418, "step": 9465 }, { "epoch": 2.58, "grad_norm": 1.9990415096647731, "learning_rate": 4.953383639234216e-07, "loss": 0.0573, "step": 9466 }, { "epoch": 2.58, "grad_norm": 1.4494208550684164, "learning_rate": 4.946990416637759e-07, "loss": 0.0403, "step": 9467 }, { "epoch": 2.58, "grad_norm": 1.4911959701832518, "learning_rate": 4.9406011077692e-07, "loss": 0.0466, "step": 9468 }, { "epoch": 2.59, "grad_norm": 1.4539111239228553, "learning_rate": 4.934215713183527e-07, "loss": 0.0341, "step": 9469 }, { "epoch": 2.59, "grad_norm": 1.5458588281804682, "learning_rate": 4.927834233435474e-07, "loss": 0.0368, "step": 9470 }, { "epoch": 2.59, "grad_norm": 1.4138629052321512, "learning_rate": 4.921456669079366e-07, "loss": 0.0379, "step": 9471 }, { "epoch": 2.59, "grad_norm": 1.323973871698604, "learning_rate": 4.915083020669248e-07, "loss": 0.041, "step": 9472 }, { "epoch": 2.59, "grad_norm": 1.5093223602738592, "learning_rate": 4.908713288758771e-07, "loss": 0.0437, "step": 9473 }, { "epoch": 2.59, "grad_norm": 1.5445211578531475, "learning_rate": 4.902347473901297e-07, "loss": 0.0407, "step": 9474 }, { "epoch": 2.59, "grad_norm": 1.4153931419642454, "learning_rate": 4.895985576649781e-07, "loss": 0.0407, "step": 9475 }, { "epoch": 2.59, "grad_norm": 1.6210309251156636, "learning_rate": 4.889627597556911e-07, "loss": 0.0518, "step": 9476 }, { "epoch": 2.59, "grad_norm": 1.6978262220180413, "learning_rate": 4.883273537174976e-07, "loss": 0.0562, "step": 9477 }, { "epoch": 2.59, "grad_norm": 1.5321382138704425, "learning_rate": 4.876923396055977e-07, "loss": 0.0441, "step": 9478 }, { "epoch": 2.59, "grad_norm": 1.2016520276614269, "learning_rate": 4.870577174751517e-07, "loss": 0.034, "step": 9479 }, { "epoch": 2.59, "grad_norm": 1.6641765635057333, "learning_rate": 4.864234873812928e-07, "loss": 0.0419, "step": 9480 }, { "epoch": 2.59, "grad_norm": 1.585848376751937, "learning_rate": 4.857896493791114e-07, "loss": 0.047, "step": 9481 }, { "epoch": 2.59, "grad_norm": 1.4335185339832035, "learning_rate": 4.851562035236723e-07, "loss": 0.0395, "step": 9482 }, { "epoch": 2.59, "grad_norm": 1.7254863440110364, "learning_rate": 4.845231498699998e-07, "loss": 0.0536, "step": 9483 }, { "epoch": 2.59, "grad_norm": 1.7456862602568157, "learning_rate": 4.8389048847309e-07, "loss": 0.0567, "step": 9484 }, { "epoch": 2.59, "grad_norm": 1.44295099503431, "learning_rate": 4.832582193878988e-07, "loss": 0.0465, "step": 9485 }, { "epoch": 2.59, "grad_norm": 1.6516397546748454, "learning_rate": 4.826263426693539e-07, "loss": 0.0448, "step": 9486 }, { "epoch": 2.59, "grad_norm": 1.42135690675478, "learning_rate": 4.819948583723427e-07, "loss": 0.0455, "step": 9487 }, { "epoch": 2.59, "grad_norm": 1.370516455354655, "learning_rate": 4.813637665517251e-07, "loss": 0.0458, "step": 9488 }, { "epoch": 2.59, "grad_norm": 1.4810766280952907, "learning_rate": 4.807330672623211e-07, "loss": 0.0396, "step": 9489 }, { "epoch": 2.59, "grad_norm": 1.368160412066376, "learning_rate": 4.80102760558921e-07, "loss": 0.042, "step": 9490 }, { "epoch": 2.59, "grad_norm": 1.5253654707617381, "learning_rate": 4.794728464962778e-07, "loss": 0.0413, "step": 9491 }, { "epoch": 2.59, "grad_norm": 1.56328917780785, "learning_rate": 4.788433251291141e-07, "loss": 0.0487, "step": 9492 }, { "epoch": 2.59, "grad_norm": 1.4845320735215057, "learning_rate": 4.782141965121129e-07, "loss": 0.0428, "step": 9493 }, { "epoch": 2.59, "grad_norm": 1.4400576891437717, "learning_rate": 4.775854606999286e-07, "loss": 0.0427, "step": 9494 }, { "epoch": 2.59, "grad_norm": 1.5139143426400339, "learning_rate": 4.769571177471771e-07, "loss": 0.0424, "step": 9495 }, { "epoch": 2.59, "grad_norm": 1.255037020951712, "learning_rate": 4.763291677084442e-07, "loss": 0.033, "step": 9496 }, { "epoch": 2.59, "grad_norm": 1.5055425892205259, "learning_rate": 4.757016106382778e-07, "loss": 0.0481, "step": 9497 }, { "epoch": 2.59, "grad_norm": 1.6102096304762528, "learning_rate": 4.750744465911955e-07, "loss": 0.0483, "step": 9498 }, { "epoch": 2.59, "grad_norm": 1.28193087951188, "learning_rate": 4.744476756216765e-07, "loss": 0.0427, "step": 9499 }, { "epoch": 2.59, "grad_norm": 1.6470718244890163, "learning_rate": 4.7382129778416885e-07, "loss": 0.0436, "step": 9500 }, { "epoch": 2.59, "grad_norm": 1.3114637889482257, "learning_rate": 4.7319531313308573e-07, "loss": 0.0363, "step": 9501 }, { "epoch": 2.59, "grad_norm": 1.4572003532748765, "learning_rate": 4.7256972172280646e-07, "loss": 0.0437, "step": 9502 }, { "epoch": 2.59, "grad_norm": 1.7873252348252229, "learning_rate": 4.7194452360767417e-07, "loss": 0.0474, "step": 9503 }, { "epoch": 2.59, "grad_norm": 1.3694109220424995, "learning_rate": 4.713197188420027e-07, "loss": 0.043, "step": 9504 }, { "epoch": 2.59, "grad_norm": 1.501250763209713, "learning_rate": 4.7069530748006463e-07, "loss": 0.0448, "step": 9505 }, { "epoch": 2.6, "grad_norm": 1.4919803786136137, "learning_rate": 4.7007128957610447e-07, "loss": 0.0437, "step": 9506 }, { "epoch": 2.6, "grad_norm": 1.4569439756482014, "learning_rate": 4.6944766518432936e-07, "loss": 0.0392, "step": 9507 }, { "epoch": 2.6, "grad_norm": 1.3668100599845112, "learning_rate": 4.6882443435891325e-07, "loss": 0.0381, "step": 9508 }, { "epoch": 2.6, "grad_norm": 1.5930763697735733, "learning_rate": 4.6820159715399715e-07, "loss": 0.0502, "step": 9509 }, { "epoch": 2.6, "grad_norm": 1.4620540062493423, "learning_rate": 4.6757915362368567e-07, "loss": 0.0488, "step": 9510 }, { "epoch": 2.6, "grad_norm": 1.5094373079756083, "learning_rate": 4.669571038220494e-07, "loss": 0.0415, "step": 9511 }, { "epoch": 2.6, "grad_norm": 1.3819065961209056, "learning_rate": 4.6633544780312565e-07, "loss": 0.04, "step": 9512 }, { "epoch": 2.6, "grad_norm": 1.4430010230852144, "learning_rate": 4.657141856209185e-07, "loss": 0.0468, "step": 9513 }, { "epoch": 2.6, "grad_norm": 1.5329100104241211, "learning_rate": 4.6509331732939476e-07, "loss": 0.0352, "step": 9514 }, { "epoch": 2.6, "grad_norm": 1.6388111210819711, "learning_rate": 4.6447284298249127e-07, "loss": 0.0461, "step": 9515 }, { "epoch": 2.6, "grad_norm": 1.6384250207853872, "learning_rate": 4.6385276263410604e-07, "loss": 0.0479, "step": 9516 }, { "epoch": 2.6, "grad_norm": 1.4805767987830838, "learning_rate": 4.6323307633810653e-07, "loss": 0.0461, "step": 9517 }, { "epoch": 2.6, "grad_norm": 1.5445254517366718, "learning_rate": 4.6261378414832304e-07, "loss": 0.0469, "step": 9518 }, { "epoch": 2.6, "grad_norm": 1.6100393614242214, "learning_rate": 4.619948861185547e-07, "loss": 0.0362, "step": 9519 }, { "epoch": 2.6, "grad_norm": 1.4934181876837194, "learning_rate": 4.6137638230256353e-07, "loss": 0.0409, "step": 9520 }, { "epoch": 2.6, "grad_norm": 1.3688147581514603, "learning_rate": 4.607582727540799e-07, "loss": 0.0375, "step": 9521 }, { "epoch": 2.6, "grad_norm": 1.350878062334665, "learning_rate": 4.601405575267981e-07, "loss": 0.0405, "step": 9522 }, { "epoch": 2.6, "grad_norm": 1.8483355241968689, "learning_rate": 4.5952323667437795e-07, "loss": 0.0516, "step": 9523 }, { "epoch": 2.6, "grad_norm": 1.5055110586907121, "learning_rate": 4.589063102504454e-07, "loss": 0.0453, "step": 9524 }, { "epoch": 2.6, "grad_norm": 1.291934826872168, "learning_rate": 4.582897783085949e-07, "loss": 0.0427, "step": 9525 }, { "epoch": 2.6, "grad_norm": 1.6701623552675646, "learning_rate": 4.576736409023813e-07, "loss": 0.0536, "step": 9526 }, { "epoch": 2.6, "grad_norm": 1.6483259650702111, "learning_rate": 4.570578980853302e-07, "loss": 0.0533, "step": 9527 }, { "epoch": 2.6, "grad_norm": 1.4582374512795266, "learning_rate": 4.564425499109304e-07, "loss": 0.0421, "step": 9528 }, { "epoch": 2.6, "grad_norm": 1.4289541497975753, "learning_rate": 4.5582759643263583e-07, "loss": 0.0364, "step": 9529 }, { "epoch": 2.6, "grad_norm": 1.4426341299269536, "learning_rate": 4.552130377038677e-07, "loss": 0.0427, "step": 9530 }, { "epoch": 2.6, "grad_norm": 1.5100662040164692, "learning_rate": 4.5459887377801273e-07, "loss": 0.0502, "step": 9531 }, { "epoch": 2.6, "grad_norm": 1.2817017970217974, "learning_rate": 4.5398510470842207e-07, "loss": 0.0428, "step": 9532 }, { "epoch": 2.6, "grad_norm": 1.5375537563431756, "learning_rate": 4.533717305484153e-07, "loss": 0.048, "step": 9533 }, { "epoch": 2.6, "grad_norm": 1.4918268622991384, "learning_rate": 4.5275875135127325e-07, "loss": 0.0489, "step": 9534 }, { "epoch": 2.6, "grad_norm": 1.5373561181011297, "learning_rate": 4.5214616717024764e-07, "loss": 0.0414, "step": 9535 }, { "epoch": 2.6, "grad_norm": 1.3558061623370292, "learning_rate": 4.5153397805855094e-07, "loss": 0.0424, "step": 9536 }, { "epoch": 2.6, "grad_norm": 1.3004160844422055, "learning_rate": 4.509221840693656e-07, "loss": 0.035, "step": 9537 }, { "epoch": 2.6, "grad_norm": 1.5876561997230576, "learning_rate": 4.503107852558358e-07, "loss": 0.0477, "step": 9538 }, { "epoch": 2.6, "grad_norm": 1.2031334073626196, "learning_rate": 4.4969978167107684e-07, "loss": 0.0396, "step": 9539 }, { "epoch": 2.6, "grad_norm": 1.5658193588597842, "learning_rate": 4.4908917336816237e-07, "loss": 0.0462, "step": 9540 }, { "epoch": 2.6, "grad_norm": 1.2056230739369738, "learning_rate": 4.484789604001377e-07, "loss": 0.0366, "step": 9541 }, { "epoch": 2.6, "grad_norm": 1.6164753919974024, "learning_rate": 4.478691428200099e-07, "loss": 0.0432, "step": 9542 }, { "epoch": 2.61, "grad_norm": 1.5479220592543506, "learning_rate": 4.472597206807561e-07, "loss": 0.0502, "step": 9543 }, { "epoch": 2.61, "grad_norm": 1.6689289070238862, "learning_rate": 4.466506940353138e-07, "loss": 0.0506, "step": 9544 }, { "epoch": 2.61, "grad_norm": 1.4197064964887574, "learning_rate": 4.460420629365919e-07, "loss": 0.0422, "step": 9545 }, { "epoch": 2.61, "grad_norm": 1.384649335660778, "learning_rate": 4.454338274374587e-07, "loss": 0.0416, "step": 9546 }, { "epoch": 2.61, "grad_norm": 1.3782718443648383, "learning_rate": 4.448259875907523e-07, "loss": 0.0378, "step": 9547 }, { "epoch": 2.61, "grad_norm": 1.614217968265468, "learning_rate": 4.4421854344927575e-07, "loss": 0.0506, "step": 9548 }, { "epoch": 2.61, "grad_norm": 1.290495904666864, "learning_rate": 4.4361149506579716e-07, "loss": 0.0382, "step": 9549 }, { "epoch": 2.61, "grad_norm": 1.7277424937807064, "learning_rate": 4.4300484249304996e-07, "loss": 0.0503, "step": 9550 }, { "epoch": 2.61, "grad_norm": 1.335265947730024, "learning_rate": 4.4239858578373597e-07, "loss": 0.044, "step": 9551 }, { "epoch": 2.61, "grad_norm": 1.699859481302849, "learning_rate": 4.4179272499051686e-07, "loss": 0.0493, "step": 9552 }, { "epoch": 2.61, "grad_norm": 1.4255126742501394, "learning_rate": 4.411872601660261e-07, "loss": 0.0471, "step": 9553 }, { "epoch": 2.61, "grad_norm": 1.6536891684380957, "learning_rate": 4.4058219136285774e-07, "loss": 0.0526, "step": 9554 }, { "epoch": 2.61, "grad_norm": 1.4532082337850223, "learning_rate": 4.399775186335764e-07, "loss": 0.0425, "step": 9555 }, { "epoch": 2.61, "grad_norm": 1.469013044530932, "learning_rate": 4.393732420307073e-07, "loss": 0.0435, "step": 9556 }, { "epoch": 2.61, "grad_norm": 1.5723628394817593, "learning_rate": 4.3876936160674623e-07, "loss": 0.0455, "step": 9557 }, { "epoch": 2.61, "grad_norm": 1.5266400676304106, "learning_rate": 4.381658774141484e-07, "loss": 0.0478, "step": 9558 }, { "epoch": 2.61, "grad_norm": 1.3870519485012351, "learning_rate": 4.375627895053408e-07, "loss": 0.0474, "step": 9559 }, { "epoch": 2.61, "grad_norm": 1.4271746261598763, "learning_rate": 4.3696009793271213e-07, "loss": 0.0457, "step": 9560 }, { "epoch": 2.61, "grad_norm": 1.5415089083127158, "learning_rate": 4.363578027486187e-07, "loss": 0.0413, "step": 9561 }, { "epoch": 2.61, "grad_norm": 1.5119009632183478, "learning_rate": 4.3575590400538046e-07, "loss": 0.0456, "step": 9562 }, { "epoch": 2.61, "grad_norm": 1.6478816027790337, "learning_rate": 4.351544017552861e-07, "loss": 0.0572, "step": 9563 }, { "epoch": 2.61, "grad_norm": 1.4489145353676427, "learning_rate": 4.3455329605058436e-07, "loss": 0.0449, "step": 9564 }, { "epoch": 2.61, "grad_norm": 1.6815556982251532, "learning_rate": 4.339525869434963e-07, "loss": 0.0465, "step": 9565 }, { "epoch": 2.61, "grad_norm": 1.2571063108565679, "learning_rate": 4.333522744862023e-07, "loss": 0.0369, "step": 9566 }, { "epoch": 2.61, "grad_norm": 1.3178132071690807, "learning_rate": 4.327523587308535e-07, "loss": 0.0425, "step": 9567 }, { "epoch": 2.61, "grad_norm": 1.7437975616228967, "learning_rate": 4.321528397295621e-07, "loss": 0.0476, "step": 9568 }, { "epoch": 2.61, "grad_norm": 1.4169624417733622, "learning_rate": 4.3155371753441146e-07, "loss": 0.0447, "step": 9569 }, { "epoch": 2.61, "grad_norm": 1.5134645657278236, "learning_rate": 4.309549921974421e-07, "loss": 0.0451, "step": 9570 }, { "epoch": 2.61, "grad_norm": 1.3311828437239435, "learning_rate": 4.3035666377066855e-07, "loss": 0.0404, "step": 9571 }, { "epoch": 2.61, "grad_norm": 1.322036452766648, "learning_rate": 4.2975873230606536e-07, "loss": 0.04, "step": 9572 }, { "epoch": 2.61, "grad_norm": 1.501500500641906, "learning_rate": 4.291611978555765e-07, "loss": 0.0453, "step": 9573 }, { "epoch": 2.61, "grad_norm": 1.3516762879611213, "learning_rate": 4.285640604711067e-07, "loss": 0.0436, "step": 9574 }, { "epoch": 2.61, "grad_norm": 1.350929535121936, "learning_rate": 4.279673202045326e-07, "loss": 0.0411, "step": 9575 }, { "epoch": 2.61, "grad_norm": 1.6777444177088288, "learning_rate": 4.2737097710768837e-07, "loss": 0.054, "step": 9576 }, { "epoch": 2.61, "grad_norm": 1.4376696621562164, "learning_rate": 4.2677503123238094e-07, "loss": 0.0451, "step": 9577 }, { "epoch": 2.61, "grad_norm": 1.632608926454742, "learning_rate": 4.261794826303783e-07, "loss": 0.0493, "step": 9578 }, { "epoch": 2.62, "grad_norm": 1.4420829343145307, "learning_rate": 4.2558433135341694e-07, "loss": 0.0455, "step": 9579 }, { "epoch": 2.62, "grad_norm": 1.6448636713443647, "learning_rate": 4.249895774531948e-07, "loss": 0.0518, "step": 9580 }, { "epoch": 2.62, "grad_norm": 1.4376438838887593, "learning_rate": 4.2439522098138173e-07, "loss": 0.0434, "step": 9581 }, { "epoch": 2.62, "grad_norm": 1.477456807762911, "learning_rate": 4.238012619896048e-07, "loss": 0.0474, "step": 9582 }, { "epoch": 2.62, "grad_norm": 1.3350933413206312, "learning_rate": 4.232077005294638e-07, "loss": 0.0352, "step": 9583 }, { "epoch": 2.62, "grad_norm": 1.3643065807432677, "learning_rate": 4.226145366525192e-07, "loss": 0.0383, "step": 9584 }, { "epoch": 2.62, "grad_norm": 1.4225632854187988, "learning_rate": 4.2202177041030025e-07, "loss": 0.0382, "step": 9585 }, { "epoch": 2.62, "grad_norm": 1.4153227719599126, "learning_rate": 4.2142940185429915e-07, "loss": 0.0391, "step": 9586 }, { "epoch": 2.62, "grad_norm": 1.6851671514111863, "learning_rate": 4.208374310359764e-07, "loss": 0.0497, "step": 9587 }, { "epoch": 2.62, "grad_norm": 1.3968557618100055, "learning_rate": 4.202458580067531e-07, "loss": 0.0445, "step": 9588 }, { "epoch": 2.62, "grad_norm": 1.5098432942304885, "learning_rate": 4.1965468281802145e-07, "loss": 0.0444, "step": 9589 }, { "epoch": 2.62, "grad_norm": 1.4841087959216106, "learning_rate": 4.190639055211349e-07, "loss": 0.0403, "step": 9590 }, { "epoch": 2.62, "grad_norm": 1.5362823199359597, "learning_rate": 4.18473526167415e-07, "loss": 0.0438, "step": 9591 }, { "epoch": 2.62, "grad_norm": 1.2829272993133471, "learning_rate": 4.1788354480814696e-07, "loss": 0.0398, "step": 9592 }, { "epoch": 2.62, "grad_norm": 1.441937812614088, "learning_rate": 4.1729396149458367e-07, "loss": 0.0453, "step": 9593 }, { "epoch": 2.62, "grad_norm": 1.3965150439936667, "learning_rate": 4.167047762779391e-07, "loss": 0.0422, "step": 9594 }, { "epoch": 2.62, "grad_norm": 1.4641677120384513, "learning_rate": 4.16115989209398e-07, "loss": 0.0471, "step": 9595 }, { "epoch": 2.62, "grad_norm": 1.4624805999265817, "learning_rate": 4.155276003401054e-07, "loss": 0.0413, "step": 9596 }, { "epoch": 2.62, "grad_norm": 1.3503470474501447, "learning_rate": 4.149396097211772e-07, "loss": 0.0386, "step": 9597 }, { "epoch": 2.62, "grad_norm": 1.7506135868904837, "learning_rate": 4.1435201740368914e-07, "loss": 0.0489, "step": 9598 }, { "epoch": 2.62, "grad_norm": 1.2793660070393602, "learning_rate": 4.137648234386871e-07, "loss": 0.038, "step": 9599 }, { "epoch": 2.62, "grad_norm": 1.5572386084438272, "learning_rate": 4.1317802787717963e-07, "loss": 0.0485, "step": 9600 }, { "epoch": 2.62, "grad_norm": 1.4277414060374767, "learning_rate": 4.1259163077014e-07, "loss": 0.0427, "step": 9601 }, { "epoch": 2.62, "grad_norm": 1.4858533183587261, "learning_rate": 4.120056321685101e-07, "loss": 0.044, "step": 9602 }, { "epoch": 2.62, "grad_norm": 1.384292873930322, "learning_rate": 4.114200321231937e-07, "loss": 0.0465, "step": 9603 }, { "epoch": 2.62, "grad_norm": 1.5467212615041195, "learning_rate": 4.108348306850629e-07, "loss": 0.0472, "step": 9604 }, { "epoch": 2.62, "grad_norm": 1.4713975399676553, "learning_rate": 4.1025002790495317e-07, "loss": 0.0371, "step": 9605 }, { "epoch": 2.62, "grad_norm": 1.622189308365567, "learning_rate": 4.09665623833666e-07, "loss": 0.0456, "step": 9606 }, { "epoch": 2.62, "grad_norm": 1.4034371176090872, "learning_rate": 4.0908161852196706e-07, "loss": 0.0465, "step": 9607 }, { "epoch": 2.62, "grad_norm": 1.3138294371510686, "learning_rate": 4.0849801202059113e-07, "loss": 0.039, "step": 9608 }, { "epoch": 2.62, "grad_norm": 1.833006646548874, "learning_rate": 4.079148043802328e-07, "loss": 0.0493, "step": 9609 }, { "epoch": 2.62, "grad_norm": 1.318008375752975, "learning_rate": 4.0733199565155814e-07, "loss": 0.0403, "step": 9610 }, { "epoch": 2.62, "grad_norm": 1.3939835669538023, "learning_rate": 4.067495858851922e-07, "loss": 0.0467, "step": 9611 }, { "epoch": 2.62, "grad_norm": 1.5090937273351503, "learning_rate": 4.0616757513173123e-07, "loss": 0.0385, "step": 9612 }, { "epoch": 2.62, "grad_norm": 1.234917698747976, "learning_rate": 4.055859634417314e-07, "loss": 0.0374, "step": 9613 }, { "epoch": 2.62, "grad_norm": 1.3393417058968564, "learning_rate": 4.050047508657201e-07, "loss": 0.0403, "step": 9614 }, { "epoch": 2.62, "grad_norm": 1.2797919141688916, "learning_rate": 4.0442393745418415e-07, "loss": 0.0368, "step": 9615 }, { "epoch": 2.63, "grad_norm": 1.5212888060663954, "learning_rate": 4.0384352325758104e-07, "loss": 0.0462, "step": 9616 }, { "epoch": 2.63, "grad_norm": 1.3996560028828897, "learning_rate": 4.0326350832632865e-07, "loss": 0.0373, "step": 9617 }, { "epoch": 2.63, "grad_norm": 1.5057362796375684, "learning_rate": 4.02683892710814e-07, "loss": 0.0376, "step": 9618 }, { "epoch": 2.63, "grad_norm": 1.4863508628178341, "learning_rate": 4.0210467646138674e-07, "loss": 0.0452, "step": 9619 }, { "epoch": 2.63, "grad_norm": 1.5403708190458993, "learning_rate": 4.0152585962836444e-07, "loss": 0.0488, "step": 9620 }, { "epoch": 2.63, "grad_norm": 1.3672349842028424, "learning_rate": 4.009474422620269e-07, "loss": 0.0448, "step": 9621 }, { "epoch": 2.63, "grad_norm": 1.3738632875135273, "learning_rate": 4.0036942441262385e-07, "loss": 0.0392, "step": 9622 }, { "epoch": 2.63, "grad_norm": 1.4511639709781454, "learning_rate": 3.997918061303635e-07, "loss": 0.0438, "step": 9623 }, { "epoch": 2.63, "grad_norm": 1.658930113970053, "learning_rate": 3.992145874654263e-07, "loss": 0.0407, "step": 9624 }, { "epoch": 2.63, "grad_norm": 1.400134270401821, "learning_rate": 3.9863776846795265e-07, "loss": 0.0387, "step": 9625 }, { "epoch": 2.63, "grad_norm": 1.235493170328634, "learning_rate": 3.980613491880525e-07, "loss": 0.0339, "step": 9626 }, { "epoch": 2.63, "grad_norm": 1.3468224840469483, "learning_rate": 3.974853296757969e-07, "loss": 0.0406, "step": 9627 }, { "epoch": 2.63, "grad_norm": 1.7401560436294927, "learning_rate": 3.9690970998122745e-07, "loss": 0.0432, "step": 9628 }, { "epoch": 2.63, "grad_norm": 1.5139068090801995, "learning_rate": 3.963344901543437e-07, "loss": 0.0491, "step": 9629 }, { "epoch": 2.63, "grad_norm": 1.2413803350643091, "learning_rate": 3.957596702451183e-07, "loss": 0.0421, "step": 9630 }, { "epoch": 2.63, "grad_norm": 1.2187718116857942, "learning_rate": 3.9518525030348307e-07, "loss": 0.0357, "step": 9631 }, { "epoch": 2.63, "grad_norm": 1.3373550799879357, "learning_rate": 3.9461123037933923e-07, "loss": 0.0393, "step": 9632 }, { "epoch": 2.63, "grad_norm": 1.6875917024728762, "learning_rate": 3.940376105225496e-07, "loss": 0.0437, "step": 9633 }, { "epoch": 2.63, "grad_norm": 1.595828548429279, "learning_rate": 3.934643907829477e-07, "loss": 0.0434, "step": 9634 }, { "epoch": 2.63, "grad_norm": 1.3621943665747567, "learning_rate": 3.9289157121032485e-07, "loss": 0.0391, "step": 9635 }, { "epoch": 2.63, "grad_norm": 1.5098424185350363, "learning_rate": 3.9231915185444337e-07, "loss": 0.0432, "step": 9636 }, { "epoch": 2.63, "grad_norm": 1.6312294134192091, "learning_rate": 3.9174713276502853e-07, "loss": 0.0409, "step": 9637 }, { "epoch": 2.63, "grad_norm": 1.3645249435977806, "learning_rate": 3.911755139917722e-07, "loss": 0.0454, "step": 9638 }, { "epoch": 2.63, "grad_norm": 1.6431027718609474, "learning_rate": 3.906042955843298e-07, "loss": 0.0478, "step": 9639 }, { "epoch": 2.63, "grad_norm": 1.4013439191510617, "learning_rate": 3.900334775923237e-07, "loss": 0.0454, "step": 9640 }, { "epoch": 2.63, "grad_norm": 1.5890009361789863, "learning_rate": 3.894630600653382e-07, "loss": 0.0446, "step": 9641 }, { "epoch": 2.63, "grad_norm": 1.589326027576601, "learning_rate": 3.888930430529275e-07, "loss": 0.0438, "step": 9642 }, { "epoch": 2.63, "grad_norm": 1.4258426411561043, "learning_rate": 3.883234266046071e-07, "loss": 0.0386, "step": 9643 }, { "epoch": 2.63, "grad_norm": 1.39026434914808, "learning_rate": 3.8775421076986066e-07, "loss": 0.0403, "step": 9644 }, { "epoch": 2.63, "grad_norm": 1.3744058080541643, "learning_rate": 3.871853955981336e-07, "loss": 0.0394, "step": 9645 }, { "epoch": 2.63, "grad_norm": 1.3062424475989316, "learning_rate": 3.866169811388415e-07, "loss": 0.039, "step": 9646 }, { "epoch": 2.63, "grad_norm": 1.482074727659546, "learning_rate": 3.8604896744135923e-07, "loss": 0.0504, "step": 9647 }, { "epoch": 2.63, "grad_norm": 1.5515775600029753, "learning_rate": 3.8548135455503176e-07, "loss": 0.0503, "step": 9648 }, { "epoch": 2.63, "grad_norm": 1.570176912303616, "learning_rate": 3.849141425291658e-07, "loss": 0.0486, "step": 9649 }, { "epoch": 2.63, "grad_norm": 1.546208018880304, "learning_rate": 3.843473314130358e-07, "loss": 0.0433, "step": 9650 }, { "epoch": 2.63, "grad_norm": 1.3709802341085098, "learning_rate": 3.837809212558796e-07, "loss": 0.0367, "step": 9651 }, { "epoch": 2.63, "grad_norm": 1.2039069547037646, "learning_rate": 3.832149121069029e-07, "loss": 0.0345, "step": 9652 }, { "epoch": 2.64, "grad_norm": 1.6244670252580329, "learning_rate": 3.8264930401527123e-07, "loss": 0.045, "step": 9653 }, { "epoch": 2.64, "grad_norm": 1.5845169308905884, "learning_rate": 3.8208409703012153e-07, "loss": 0.0491, "step": 9654 }, { "epoch": 2.64, "grad_norm": 1.2980211178456464, "learning_rate": 3.815192912005505e-07, "loss": 0.0386, "step": 9655 }, { "epoch": 2.64, "grad_norm": 1.5798748475729993, "learning_rate": 3.809548865756246e-07, "loss": 0.0486, "step": 9656 }, { "epoch": 2.64, "grad_norm": 1.6081215345888544, "learning_rate": 3.803908832043718e-07, "loss": 0.0444, "step": 9657 }, { "epoch": 2.64, "grad_norm": 1.688638157637924, "learning_rate": 3.7982728113578946e-07, "loss": 0.0526, "step": 9658 }, { "epoch": 2.64, "grad_norm": 1.470612772243646, "learning_rate": 3.7926408041883355e-07, "loss": 0.0481, "step": 9659 }, { "epoch": 2.64, "grad_norm": 1.4298932739818198, "learning_rate": 3.7870128110243155e-07, "loss": 0.0417, "step": 9660 }, { "epoch": 2.64, "grad_norm": 1.1776982384922046, "learning_rate": 3.7813888323547155e-07, "loss": 0.0311, "step": 9661 }, { "epoch": 2.64, "grad_norm": 1.5898361754061843, "learning_rate": 3.7757688686681117e-07, "loss": 0.0469, "step": 9662 }, { "epoch": 2.64, "grad_norm": 1.2990397451577733, "learning_rate": 3.7701529204526856e-07, "loss": 0.0351, "step": 9663 }, { "epoch": 2.64, "grad_norm": 1.4558233502207352, "learning_rate": 3.7645409881963133e-07, "loss": 0.042, "step": 9664 }, { "epoch": 2.64, "grad_norm": 1.3717147224539585, "learning_rate": 3.7589330723864724e-07, "loss": 0.0392, "step": 9665 }, { "epoch": 2.64, "grad_norm": 1.3737318122822282, "learning_rate": 3.753329173510345e-07, "loss": 0.0432, "step": 9666 }, { "epoch": 2.64, "grad_norm": 1.5664873477638628, "learning_rate": 3.7477292920547134e-07, "loss": 0.0431, "step": 9667 }, { "epoch": 2.64, "grad_norm": 1.505345058074906, "learning_rate": 3.7421334285060617e-07, "loss": 0.0443, "step": 9668 }, { "epoch": 2.64, "grad_norm": 1.7109639389418407, "learning_rate": 3.736541583350473e-07, "loss": 0.0555, "step": 9669 }, { "epoch": 2.64, "grad_norm": 1.5779923672220566, "learning_rate": 3.730953757073741e-07, "loss": 0.0471, "step": 9670 }, { "epoch": 2.64, "grad_norm": 1.625205631063717, "learning_rate": 3.7253699501612394e-07, "loss": 0.0457, "step": 9671 }, { "epoch": 2.64, "grad_norm": 1.512805026027645, "learning_rate": 3.719790163098058e-07, "loss": 0.046, "step": 9672 }, { "epoch": 2.64, "grad_norm": 1.4187021956642465, "learning_rate": 3.7142143963688927e-07, "loss": 0.0447, "step": 9673 }, { "epoch": 2.64, "grad_norm": 1.6555273882899562, "learning_rate": 3.7086426504581166e-07, "loss": 0.0489, "step": 9674 }, { "epoch": 2.64, "grad_norm": 1.6604519275628469, "learning_rate": 3.7030749258497365e-07, "loss": 0.0583, "step": 9675 }, { "epoch": 2.64, "grad_norm": 1.513595617640187, "learning_rate": 3.697511223027439e-07, "loss": 0.0433, "step": 9676 }, { "epoch": 2.64, "grad_norm": 1.3531490921404383, "learning_rate": 3.6919515424745035e-07, "loss": 0.0415, "step": 9677 }, { "epoch": 2.64, "grad_norm": 1.5663841629182775, "learning_rate": 3.6863958846739213e-07, "loss": 0.046, "step": 9678 }, { "epoch": 2.64, "grad_norm": 1.3475148826851961, "learning_rate": 3.6808442501083007e-07, "loss": 0.0356, "step": 9679 }, { "epoch": 2.64, "grad_norm": 1.4235683745953958, "learning_rate": 3.675296639259912e-07, "loss": 0.0439, "step": 9680 }, { "epoch": 2.64, "grad_norm": 1.2864579277767414, "learning_rate": 3.6697530526106697e-07, "loss": 0.0417, "step": 9681 }, { "epoch": 2.64, "grad_norm": 1.5248006513046146, "learning_rate": 3.66421349064216e-07, "loss": 0.0474, "step": 9682 }, { "epoch": 2.64, "grad_norm": 1.6962991151385718, "learning_rate": 3.6586779538355656e-07, "loss": 0.0423, "step": 9683 }, { "epoch": 2.64, "grad_norm": 1.4313109310666983, "learning_rate": 3.6531464426717843e-07, "loss": 0.0497, "step": 9684 }, { "epoch": 2.64, "grad_norm": 1.5021430581608235, "learning_rate": 3.6476189576313215e-07, "loss": 0.0399, "step": 9685 }, { "epoch": 2.64, "grad_norm": 1.5188652925885526, "learning_rate": 3.6420954991943537e-07, "loss": 0.0476, "step": 9686 }, { "epoch": 2.64, "grad_norm": 1.5099403700176421, "learning_rate": 3.636576067840697e-07, "loss": 0.0435, "step": 9687 }, { "epoch": 2.64, "grad_norm": 1.2953869764399193, "learning_rate": 3.631060664049824e-07, "loss": 0.036, "step": 9688 }, { "epoch": 2.65, "grad_norm": 10.135637780544938, "learning_rate": 3.6255492883008446e-07, "loss": 0.0839, "step": 9689 }, { "epoch": 2.65, "grad_norm": 1.297224895146072, "learning_rate": 3.620041941072544e-07, "loss": 0.0333, "step": 9690 }, { "epoch": 2.65, "grad_norm": 1.403688915026389, "learning_rate": 3.614538622843328e-07, "loss": 0.04, "step": 9691 }, { "epoch": 2.65, "grad_norm": 1.428840921297277, "learning_rate": 3.609039334091269e-07, "loss": 0.0446, "step": 9692 }, { "epoch": 2.65, "grad_norm": 1.6866592808076206, "learning_rate": 3.6035440752941075e-07, "loss": 0.0441, "step": 9693 }, { "epoch": 2.65, "grad_norm": 1.6774085384434785, "learning_rate": 3.598052846929184e-07, "loss": 0.0495, "step": 9694 }, { "epoch": 2.65, "grad_norm": 1.6489876335898097, "learning_rate": 3.592565649473534e-07, "loss": 0.0521, "step": 9695 }, { "epoch": 2.65, "grad_norm": 1.4852908212474134, "learning_rate": 3.58708248340382e-07, "loss": 0.0481, "step": 9696 }, { "epoch": 2.65, "grad_norm": 1.7915707820764906, "learning_rate": 3.581603349196372e-07, "loss": 0.0474, "step": 9697 }, { "epoch": 2.65, "grad_norm": 1.3782621004698132, "learning_rate": 3.576128247327143e-07, "loss": 0.0368, "step": 9698 }, { "epoch": 2.65, "grad_norm": 1.3292731299290403, "learning_rate": 3.57065717827178e-07, "loss": 0.0393, "step": 9699 }, { "epoch": 2.65, "grad_norm": 1.6146172252114868, "learning_rate": 3.565190142505515e-07, "loss": 0.0494, "step": 9700 }, { "epoch": 2.65, "grad_norm": 1.2775572420920787, "learning_rate": 3.5597271405032887e-07, "loss": 0.0325, "step": 9701 }, { "epoch": 2.65, "grad_norm": 1.5191835708494437, "learning_rate": 3.5542681727396613e-07, "loss": 0.0412, "step": 9702 }, { "epoch": 2.65, "grad_norm": 1.510335564061284, "learning_rate": 3.548813239688853e-07, "loss": 0.044, "step": 9703 }, { "epoch": 2.65, "grad_norm": 1.854392936934028, "learning_rate": 3.54336234182473e-07, "loss": 0.0517, "step": 9704 }, { "epoch": 2.65, "grad_norm": 1.4958541756591652, "learning_rate": 3.537915479620818e-07, "loss": 0.0469, "step": 9705 }, { "epoch": 2.65, "grad_norm": 1.3512280113677804, "learning_rate": 3.532472653550262e-07, "loss": 0.0423, "step": 9706 }, { "epoch": 2.65, "grad_norm": 1.5478550426148945, "learning_rate": 3.5270338640858993e-07, "loss": 0.0472, "step": 9707 }, { "epoch": 2.65, "grad_norm": 1.4953008899616966, "learning_rate": 3.521599111700169e-07, "loss": 0.039, "step": 9708 }, { "epoch": 2.65, "grad_norm": 1.7281674031853826, "learning_rate": 3.5161683968652104e-07, "loss": 0.053, "step": 9709 }, { "epoch": 2.65, "grad_norm": 1.6259853940180455, "learning_rate": 3.5107417200527625e-07, "loss": 0.0421, "step": 9710 }, { "epoch": 2.65, "grad_norm": 1.6828368280114945, "learning_rate": 3.5053190817342707e-07, "loss": 0.0535, "step": 9711 }, { "epoch": 2.65, "grad_norm": 1.3286215114643205, "learning_rate": 3.499900482380758e-07, "loss": 0.0414, "step": 9712 }, { "epoch": 2.65, "grad_norm": 1.539048145541395, "learning_rate": 3.4944859224629645e-07, "loss": 0.0471, "step": 9713 }, { "epoch": 2.65, "grad_norm": 1.4901233155701168, "learning_rate": 3.4890754024512254e-07, "loss": 0.043, "step": 9714 }, { "epoch": 2.65, "grad_norm": 1.5051511939728763, "learning_rate": 3.4836689228155697e-07, "loss": 0.0462, "step": 9715 }, { "epoch": 2.65, "grad_norm": 1.377644116671599, "learning_rate": 3.4782664840256387e-07, "loss": 0.0368, "step": 9716 }, { "epoch": 2.65, "grad_norm": 1.2006219765145378, "learning_rate": 3.472868086550768e-07, "loss": 0.0358, "step": 9717 }, { "epoch": 2.65, "grad_norm": 1.2678294845884381, "learning_rate": 3.4674737308598714e-07, "loss": 0.0376, "step": 9718 }, { "epoch": 2.65, "grad_norm": 1.3929636370664398, "learning_rate": 3.4620834174215856e-07, "loss": 0.0412, "step": 9719 }, { "epoch": 2.65, "grad_norm": 1.4893405556902184, "learning_rate": 3.4566971467041463e-07, "loss": 0.047, "step": 9720 }, { "epoch": 2.65, "grad_norm": 1.5853225786233056, "learning_rate": 3.4513149191754635e-07, "loss": 0.0524, "step": 9721 }, { "epoch": 2.65, "grad_norm": 1.6986716196288256, "learning_rate": 3.4459367353030846e-07, "loss": 0.0506, "step": 9722 }, { "epoch": 2.65, "grad_norm": 1.5638335667719534, "learning_rate": 3.4405625955542254e-07, "loss": 0.0375, "step": 9723 }, { "epoch": 2.65, "grad_norm": 1.411961688956543, "learning_rate": 3.4351925003957065e-07, "loss": 0.0413, "step": 9724 }, { "epoch": 2.65, "grad_norm": 1.3626148803904228, "learning_rate": 3.4298264502940436e-07, "loss": 0.047, "step": 9725 }, { "epoch": 2.66, "grad_norm": 1.5640031429974925, "learning_rate": 3.42446444571537e-07, "loss": 0.0526, "step": 9726 }, { "epoch": 2.66, "grad_norm": 1.3401156168510278, "learning_rate": 3.419106487125495e-07, "loss": 0.0389, "step": 9727 }, { "epoch": 2.66, "grad_norm": 1.4997626914865505, "learning_rate": 3.4137525749898425e-07, "loss": 0.044, "step": 9728 }, { "epoch": 2.66, "grad_norm": 1.3883680349965608, "learning_rate": 3.408402709773534e-07, "loss": 0.0438, "step": 9729 }, { "epoch": 2.66, "grad_norm": 1.2925281710706147, "learning_rate": 3.4030568919412697e-07, "loss": 0.0425, "step": 9730 }, { "epoch": 2.66, "grad_norm": 1.272416215838726, "learning_rate": 3.397715121957468e-07, "loss": 0.0377, "step": 9731 }, { "epoch": 2.66, "grad_norm": 1.363483812621177, "learning_rate": 3.3923774002861454e-07, "loss": 0.0393, "step": 9732 }, { "epoch": 2.66, "grad_norm": 1.3283657593865004, "learning_rate": 3.387043727391004e-07, "loss": 0.0362, "step": 9733 }, { "epoch": 2.66, "grad_norm": 1.2920712417065263, "learning_rate": 3.3817141037353565e-07, "loss": 0.0378, "step": 9734 }, { "epoch": 2.66, "grad_norm": 1.6409444840690268, "learning_rate": 3.3763885297822153e-07, "loss": 0.0557, "step": 9735 }, { "epoch": 2.66, "grad_norm": 1.8048520824904202, "learning_rate": 3.3710670059941777e-07, "loss": 0.0484, "step": 9736 }, { "epoch": 2.66, "grad_norm": 1.476179626598445, "learning_rate": 3.36574953283354e-07, "loss": 0.0486, "step": 9737 }, { "epoch": 2.66, "grad_norm": 1.4594338579927015, "learning_rate": 3.3604361107622106e-07, "loss": 0.0431, "step": 9738 }, { "epoch": 2.66, "grad_norm": 1.2122888322147463, "learning_rate": 3.3551267402417874e-07, "loss": 0.0351, "step": 9739 }, { "epoch": 2.66, "grad_norm": 2.5503853545996096, "learning_rate": 3.349821421733468e-07, "loss": 0.0387, "step": 9740 }, { "epoch": 2.66, "grad_norm": 1.342768497659534, "learning_rate": 3.34452015569815e-07, "loss": 0.0421, "step": 9741 }, { "epoch": 2.66, "grad_norm": 1.3232370565035567, "learning_rate": 3.339222942596321e-07, "loss": 0.0371, "step": 9742 }, { "epoch": 2.66, "grad_norm": 1.9085039908039205, "learning_rate": 3.333929782888168e-07, "loss": 0.05, "step": 9743 }, { "epoch": 2.66, "grad_norm": 1.4828854416236226, "learning_rate": 3.3286406770334843e-07, "loss": 0.0423, "step": 9744 }, { "epoch": 2.66, "grad_norm": 1.5442392845080655, "learning_rate": 3.323355625491759e-07, "loss": 0.0513, "step": 9745 }, { "epoch": 2.66, "grad_norm": 1.3520004766054956, "learning_rate": 3.318074628722079e-07, "loss": 0.0383, "step": 9746 }, { "epoch": 2.66, "grad_norm": 1.5964540815010628, "learning_rate": 3.312797687183217e-07, "loss": 0.0582, "step": 9747 }, { "epoch": 2.66, "grad_norm": 1.56785560375489, "learning_rate": 3.3075248013335614e-07, "loss": 0.0508, "step": 9748 }, { "epoch": 2.66, "grad_norm": 1.614260796169554, "learning_rate": 3.30225597163118e-07, "loss": 0.0518, "step": 9749 }, { "epoch": 2.66, "grad_norm": 1.5694730490528126, "learning_rate": 3.2969911985337556e-07, "loss": 0.0451, "step": 9750 }, { "epoch": 2.66, "grad_norm": 1.7683488225591102, "learning_rate": 3.2917304824986505e-07, "loss": 0.0561, "step": 9751 }, { "epoch": 2.66, "grad_norm": 1.4454028560418148, "learning_rate": 3.2864738239828553e-07, "loss": 0.0418, "step": 9752 }, { "epoch": 2.66, "grad_norm": 1.5873321412313446, "learning_rate": 3.281221223443026e-07, "loss": 0.0452, "step": 9753 }, { "epoch": 2.66, "grad_norm": 1.533996345376737, "learning_rate": 3.275972681335421e-07, "loss": 0.0366, "step": 9754 }, { "epoch": 2.66, "grad_norm": 1.4070084597921413, "learning_rate": 3.2707281981160075e-07, "loss": 0.0414, "step": 9755 }, { "epoch": 2.66, "grad_norm": 1.1728929179470517, "learning_rate": 3.26548777424035e-07, "loss": 0.0315, "step": 9756 }, { "epoch": 2.66, "grad_norm": 1.392700248420635, "learning_rate": 3.2602514101637004e-07, "loss": 0.0431, "step": 9757 }, { "epoch": 2.66, "grad_norm": 1.410957420236946, "learning_rate": 3.255019106340923e-07, "loss": 0.0426, "step": 9758 }, { "epoch": 2.66, "grad_norm": 1.4396989430568352, "learning_rate": 3.249790863226565e-07, "loss": 0.0507, "step": 9759 }, { "epoch": 2.66, "grad_norm": 1.6143241200088134, "learning_rate": 3.244566681274769e-07, "loss": 0.0507, "step": 9760 }, { "epoch": 2.66, "grad_norm": 1.7198136483955655, "learning_rate": 3.2393465609393825e-07, "loss": 0.0536, "step": 9761 }, { "epoch": 2.67, "grad_norm": 1.398777095462013, "learning_rate": 3.23413050267386e-07, "loss": 0.0429, "step": 9762 }, { "epoch": 2.67, "grad_norm": 1.3250820426437029, "learning_rate": 3.2289185069313277e-07, "loss": 0.0392, "step": 9763 }, { "epoch": 2.67, "grad_norm": 1.3451370940852598, "learning_rate": 3.2237105741645456e-07, "loss": 0.0423, "step": 9764 }, { "epoch": 2.67, "grad_norm": 1.599758765473879, "learning_rate": 3.2185067048259245e-07, "loss": 0.0429, "step": 9765 }, { "epoch": 2.67, "grad_norm": 1.6036227106949597, "learning_rate": 3.213306899367508e-07, "loss": 0.0483, "step": 9766 }, { "epoch": 2.67, "grad_norm": 1.482535618187445, "learning_rate": 3.208111158241023e-07, "loss": 0.0426, "step": 9767 }, { "epoch": 2.67, "grad_norm": 1.4123398585691431, "learning_rate": 3.2029194818977984e-07, "loss": 0.0404, "step": 9768 }, { "epoch": 2.67, "grad_norm": 1.421548355183196, "learning_rate": 3.1977318707888506e-07, "loss": 0.04, "step": 9769 }, { "epoch": 2.67, "grad_norm": 1.6064524386653583, "learning_rate": 3.1925483253648135e-07, "loss": 0.0441, "step": 9770 }, { "epoch": 2.67, "grad_norm": 1.383278161948324, "learning_rate": 3.187368846075983e-07, "loss": 0.039, "step": 9771 }, { "epoch": 2.67, "grad_norm": 1.391997503570171, "learning_rate": 3.182193433372288e-07, "loss": 0.0437, "step": 9772 }, { "epoch": 2.67, "grad_norm": 1.633697649344028, "learning_rate": 3.1770220877033243e-07, "loss": 0.0527, "step": 9773 }, { "epoch": 2.67, "grad_norm": 1.5719644713419079, "learning_rate": 3.1718548095183153e-07, "loss": 0.0447, "step": 9774 }, { "epoch": 2.67, "grad_norm": 1.494395449206361, "learning_rate": 3.166691599266153e-07, "loss": 0.048, "step": 9775 }, { "epoch": 2.67, "grad_norm": 1.3734912339847678, "learning_rate": 3.161532457395355e-07, "loss": 0.0433, "step": 9776 }, { "epoch": 2.67, "grad_norm": 1.463169521929121, "learning_rate": 3.156377384354087e-07, "loss": 0.0453, "step": 9777 }, { "epoch": 2.67, "grad_norm": 1.4940975046453457, "learning_rate": 3.1512263805901667e-07, "loss": 0.0482, "step": 9778 }, { "epoch": 2.67, "grad_norm": 1.678748396347184, "learning_rate": 3.14607944655107e-07, "loss": 0.0549, "step": 9779 }, { "epoch": 2.67, "grad_norm": 1.2637093962025079, "learning_rate": 3.14093658268389e-07, "loss": 0.0344, "step": 9780 }, { "epoch": 2.67, "grad_norm": 1.6270888878882568, "learning_rate": 3.135797789435407e-07, "loss": 0.0462, "step": 9781 }, { "epoch": 2.67, "grad_norm": 1.21991156245253, "learning_rate": 3.1306630672520153e-07, "loss": 0.0319, "step": 9782 }, { "epoch": 2.67, "grad_norm": 1.4038695739936458, "learning_rate": 3.125532416579763e-07, "loss": 0.0406, "step": 9783 }, { "epoch": 2.67, "grad_norm": 1.3508710337807508, "learning_rate": 3.1204058378643375e-07, "loss": 0.0432, "step": 9784 }, { "epoch": 2.67, "grad_norm": 1.6502976609643316, "learning_rate": 3.115283331551089e-07, "loss": 0.0419, "step": 9785 }, { "epoch": 2.67, "grad_norm": 1.4305730020335734, "learning_rate": 3.1101648980850217e-07, "loss": 0.0452, "step": 9786 }, { "epoch": 2.67, "grad_norm": 1.5634464708578433, "learning_rate": 3.105050537910742e-07, "loss": 0.0461, "step": 9787 }, { "epoch": 2.67, "grad_norm": 1.4910660517319974, "learning_rate": 3.099940251472572e-07, "loss": 0.0403, "step": 9788 }, { "epoch": 2.67, "grad_norm": 1.6738891191621585, "learning_rate": 3.0948340392143897e-07, "loss": 0.0513, "step": 9789 }, { "epoch": 2.67, "grad_norm": 1.3986875170971205, "learning_rate": 3.0897319015798067e-07, "loss": 0.0378, "step": 9790 }, { "epoch": 2.67, "grad_norm": 1.5293050996106932, "learning_rate": 3.084633839012019e-07, "loss": 0.0484, "step": 9791 }, { "epoch": 2.67, "grad_norm": 1.5281477245803312, "learning_rate": 3.0795398519539113e-07, "loss": 0.043, "step": 9792 }, { "epoch": 2.67, "grad_norm": 1.6091043511257537, "learning_rate": 3.074449940847979e-07, "loss": 0.0533, "step": 9793 }, { "epoch": 2.67, "grad_norm": 1.5523465910460366, "learning_rate": 3.069364106136402e-07, "loss": 0.0475, "step": 9794 }, { "epoch": 2.67, "grad_norm": 1.699985764421929, "learning_rate": 3.0642823482609495e-07, "loss": 0.0489, "step": 9795 }, { "epoch": 2.67, "grad_norm": 1.6321301043488206, "learning_rate": 3.0592046676631015e-07, "loss": 0.0487, "step": 9796 }, { "epoch": 2.67, "grad_norm": 1.2115564049325827, "learning_rate": 3.054131064783933e-07, "loss": 0.04, "step": 9797 }, { "epoch": 2.67, "grad_norm": 1.3469935338261332, "learning_rate": 3.049061540064202e-07, "loss": 0.0427, "step": 9798 }, { "epoch": 2.68, "grad_norm": 1.3876714390383034, "learning_rate": 3.0439960939442794e-07, "loss": 0.0397, "step": 9799 }, { "epoch": 2.68, "grad_norm": 1.4282545431976708, "learning_rate": 3.038934726864218e-07, "loss": 0.034, "step": 9800 }, { "epoch": 2.68, "grad_norm": 1.6714890567533012, "learning_rate": 3.033877439263666e-07, "loss": 0.0488, "step": 9801 }, { "epoch": 2.68, "grad_norm": 1.4031503952263187, "learning_rate": 3.0288242315819724e-07, "loss": 0.0471, "step": 9802 }, { "epoch": 2.68, "grad_norm": 1.3463164412475201, "learning_rate": 3.0237751042580866e-07, "loss": 0.0388, "step": 9803 }, { "epoch": 2.68, "grad_norm": 1.6023658979704292, "learning_rate": 3.0187300577306456e-07, "loss": 0.0413, "step": 9804 }, { "epoch": 2.68, "grad_norm": 1.5198271549143991, "learning_rate": 3.013689092437888e-07, "loss": 0.0472, "step": 9805 }, { "epoch": 2.68, "grad_norm": 1.6933446982158806, "learning_rate": 3.0086522088177415e-07, "loss": 0.0474, "step": 9806 }, { "epoch": 2.68, "grad_norm": 1.465241027092162, "learning_rate": 3.003619407307734e-07, "loss": 0.042, "step": 9807 }, { "epoch": 2.68, "grad_norm": 1.2992516944884374, "learning_rate": 2.9985906883450765e-07, "loss": 0.0419, "step": 9808 }, { "epoch": 2.68, "grad_norm": 1.4912614738979613, "learning_rate": 2.9935660523665976e-07, "loss": 0.0436, "step": 9809 }, { "epoch": 2.68, "grad_norm": 1.383811868812435, "learning_rate": 2.988545499808804e-07, "loss": 0.0432, "step": 9810 }, { "epoch": 2.68, "grad_norm": 1.3341734929850606, "learning_rate": 2.9835290311078123e-07, "loss": 0.0387, "step": 9811 }, { "epoch": 2.68, "grad_norm": 1.3192136579361862, "learning_rate": 2.9785166466994195e-07, "loss": 0.0361, "step": 9812 }, { "epoch": 2.68, "grad_norm": 1.553888799852158, "learning_rate": 2.9735083470190164e-07, "loss": 0.0527, "step": 9813 }, { "epoch": 2.68, "grad_norm": 1.652181334845828, "learning_rate": 2.9685041325016983e-07, "loss": 0.0471, "step": 9814 }, { "epoch": 2.68, "grad_norm": 1.443994193885354, "learning_rate": 2.9635040035821627e-07, "loss": 0.0437, "step": 9815 }, { "epoch": 2.68, "grad_norm": 1.4326933168637934, "learning_rate": 2.9585079606947843e-07, "loss": 0.0422, "step": 9816 }, { "epoch": 2.68, "grad_norm": 1.6185615132232907, "learning_rate": 2.953516004273543e-07, "loss": 0.0471, "step": 9817 }, { "epoch": 2.68, "grad_norm": 1.3645734980620279, "learning_rate": 2.948528134752121e-07, "loss": 0.035, "step": 9818 }, { "epoch": 2.68, "grad_norm": 1.5018381755880716, "learning_rate": 2.943544352563771e-07, "loss": 0.0447, "step": 9819 }, { "epoch": 2.68, "grad_norm": 1.3158266951887627, "learning_rate": 2.938564658141463e-07, "loss": 0.0356, "step": 9820 }, { "epoch": 2.68, "grad_norm": 1.5306089743479394, "learning_rate": 2.933589051917757e-07, "loss": 0.046, "step": 9821 }, { "epoch": 2.68, "grad_norm": 1.5657788120774125, "learning_rate": 2.9286175343249015e-07, "loss": 0.0446, "step": 9822 }, { "epoch": 2.68, "grad_norm": 1.5756581173013304, "learning_rate": 2.9236501057947506e-07, "loss": 0.0426, "step": 9823 }, { "epoch": 2.68, "grad_norm": 1.545620882655996, "learning_rate": 2.918686766758844e-07, "loss": 0.0446, "step": 9824 }, { "epoch": 2.68, "grad_norm": 1.4705908053138845, "learning_rate": 2.913727517648318e-07, "loss": 0.0447, "step": 9825 }, { "epoch": 2.68, "grad_norm": 1.5773264693029911, "learning_rate": 2.908772358894002e-07, "loss": 0.0471, "step": 9826 }, { "epoch": 2.68, "grad_norm": 1.3602340679270009, "learning_rate": 2.903821290926329e-07, "loss": 0.0367, "step": 9827 }, { "epoch": 2.68, "grad_norm": 1.4474149091282598, "learning_rate": 2.898874314175415e-07, "loss": 0.0457, "step": 9828 }, { "epoch": 2.68, "grad_norm": 1.6105528413705201, "learning_rate": 2.8939314290709784e-07, "loss": 0.0465, "step": 9829 }, { "epoch": 2.68, "grad_norm": 1.2969374472911948, "learning_rate": 2.888992636042437e-07, "loss": 0.0345, "step": 9830 }, { "epoch": 2.68, "grad_norm": 1.6759926534803957, "learning_rate": 2.8840579355187803e-07, "loss": 0.0423, "step": 9831 }, { "epoch": 2.68, "grad_norm": 1.4730773547325942, "learning_rate": 2.87912732792871e-07, "loss": 0.0482, "step": 9832 }, { "epoch": 2.68, "grad_norm": 1.240197143859333, "learning_rate": 2.874200813700534e-07, "loss": 0.0345, "step": 9833 }, { "epoch": 2.68, "grad_norm": 1.5300998369846195, "learning_rate": 2.869278393262226e-07, "loss": 0.0487, "step": 9834 }, { "epoch": 2.68, "grad_norm": 1.6449191163018604, "learning_rate": 2.8643600670413773e-07, "loss": 0.047, "step": 9835 }, { "epoch": 2.69, "grad_norm": 1.365745756381367, "learning_rate": 2.8594458354652687e-07, "loss": 0.0391, "step": 9836 }, { "epoch": 2.69, "grad_norm": 1.4020974106830846, "learning_rate": 2.8545356989607587e-07, "loss": 0.0377, "step": 9837 }, { "epoch": 2.69, "grad_norm": 1.4993321914494795, "learning_rate": 2.849629657954417e-07, "loss": 0.0423, "step": 9838 }, { "epoch": 2.69, "grad_norm": 1.3176257290625997, "learning_rate": 2.8447277128724136e-07, "loss": 0.0396, "step": 9839 }, { "epoch": 2.69, "grad_norm": 1.5722986440128743, "learning_rate": 2.839829864140586e-07, "loss": 0.0505, "step": 9840 }, { "epoch": 2.69, "grad_norm": 1.3387003357080745, "learning_rate": 2.8349361121844056e-07, "loss": 0.0316, "step": 9841 }, { "epoch": 2.69, "grad_norm": 1.3888312025810354, "learning_rate": 2.8300464574289866e-07, "loss": 0.0473, "step": 9842 }, { "epoch": 2.69, "grad_norm": 1.7009623484664964, "learning_rate": 2.8251609002990844e-07, "loss": 0.05, "step": 9843 }, { "epoch": 2.69, "grad_norm": 1.309737893226172, "learning_rate": 2.82027944121912e-07, "loss": 0.0336, "step": 9844 }, { "epoch": 2.69, "grad_norm": 1.5967437592276221, "learning_rate": 2.815402080613122e-07, "loss": 0.0433, "step": 9845 }, { "epoch": 2.69, "grad_norm": 1.3585195529859975, "learning_rate": 2.810528818904812e-07, "loss": 0.036, "step": 9846 }, { "epoch": 2.69, "grad_norm": 1.4126556863302957, "learning_rate": 2.8056596565175067e-07, "loss": 0.0467, "step": 9847 }, { "epoch": 2.69, "grad_norm": 1.3086968936064312, "learning_rate": 2.80079459387419e-07, "loss": 0.0361, "step": 9848 }, { "epoch": 2.69, "grad_norm": 1.6818363077834921, "learning_rate": 2.7959336313974847e-07, "loss": 0.0504, "step": 9849 }, { "epoch": 2.69, "grad_norm": 1.5029960459553915, "learning_rate": 2.7910767695096707e-07, "loss": 0.0474, "step": 9850 }, { "epoch": 2.69, "grad_norm": 1.3907806419948412, "learning_rate": 2.7862240086326486e-07, "loss": 0.0435, "step": 9851 }, { "epoch": 2.69, "grad_norm": 1.452709734046454, "learning_rate": 2.781375349187987e-07, "loss": 0.0409, "step": 9852 }, { "epoch": 2.69, "grad_norm": 1.7429092616502795, "learning_rate": 2.7765307915968763e-07, "loss": 0.0567, "step": 9853 }, { "epoch": 2.69, "grad_norm": 1.705903219762938, "learning_rate": 2.771690336280164e-07, "loss": 0.0556, "step": 9854 }, { "epoch": 2.69, "grad_norm": 1.7142335408285376, "learning_rate": 2.7668539836583295e-07, "loss": 0.0466, "step": 9855 }, { "epoch": 2.69, "grad_norm": 1.637738099032581, "learning_rate": 2.762021734151521e-07, "loss": 0.0482, "step": 9856 }, { "epoch": 2.69, "grad_norm": 1.3529093302066764, "learning_rate": 2.7571935881794963e-07, "loss": 0.0359, "step": 9857 }, { "epoch": 2.69, "grad_norm": 1.5529970753734432, "learning_rate": 2.7523695461616875e-07, "loss": 0.0432, "step": 9858 }, { "epoch": 2.69, "grad_norm": 1.707764222059563, "learning_rate": 2.747549608517147e-07, "loss": 0.0516, "step": 9859 }, { "epoch": 2.69, "grad_norm": 1.4463722368483976, "learning_rate": 2.74273377566458e-07, "loss": 0.0423, "step": 9860 }, { "epoch": 2.69, "grad_norm": 1.349447549362689, "learning_rate": 2.7379220480223345e-07, "loss": 0.0442, "step": 9861 }, { "epoch": 2.69, "grad_norm": 1.5103472549282548, "learning_rate": 2.7331144260084096e-07, "loss": 0.0402, "step": 9862 }, { "epoch": 2.69, "grad_norm": 1.2552170688503217, "learning_rate": 2.7283109100404323e-07, "loss": 0.0355, "step": 9863 }, { "epoch": 2.69, "grad_norm": 1.6171469551718765, "learning_rate": 2.7235115005356913e-07, "loss": 0.0444, "step": 9864 }, { "epoch": 2.69, "grad_norm": 1.5979316493374738, "learning_rate": 2.718716197911098e-07, "loss": 0.0417, "step": 9865 }, { "epoch": 2.69, "grad_norm": 1.6841265472287308, "learning_rate": 2.713925002583223e-07, "loss": 0.0459, "step": 9866 }, { "epoch": 2.69, "grad_norm": 1.65455171703717, "learning_rate": 2.7091379149682683e-07, "loss": 0.0518, "step": 9867 }, { "epoch": 2.69, "grad_norm": 1.6998033465079585, "learning_rate": 2.704354935482095e-07, "loss": 0.0467, "step": 9868 }, { "epoch": 2.69, "grad_norm": 1.593785477475419, "learning_rate": 2.699576064540188e-07, "loss": 0.0432, "step": 9869 }, { "epoch": 2.69, "grad_norm": 1.4622081571625656, "learning_rate": 2.6948013025576927e-07, "loss": 0.0433, "step": 9870 }, { "epoch": 2.69, "grad_norm": 1.562749547860653, "learning_rate": 2.6900306499493875e-07, "loss": 0.0485, "step": 9871 }, { "epoch": 2.7, "grad_norm": 1.7302400372988471, "learning_rate": 2.685264107129698e-07, "loss": 0.0422, "step": 9872 }, { "epoch": 2.7, "grad_norm": 1.4753895557590084, "learning_rate": 2.680501674512681e-07, "loss": 0.0415, "step": 9873 }, { "epoch": 2.7, "grad_norm": 1.5588022569303843, "learning_rate": 2.675743352512061e-07, "loss": 0.0484, "step": 9874 }, { "epoch": 2.7, "grad_norm": 2.0242744658091154, "learning_rate": 2.6709891415411747e-07, "loss": 0.048, "step": 9875 }, { "epoch": 2.7, "grad_norm": 1.5001127430636434, "learning_rate": 2.66623904201303e-07, "loss": 0.0453, "step": 9876 }, { "epoch": 2.7, "grad_norm": 1.4175396211241404, "learning_rate": 2.661493054340264e-07, "loss": 0.0398, "step": 9877 }, { "epoch": 2.7, "grad_norm": 1.5714798136902872, "learning_rate": 2.656751178935146e-07, "loss": 0.0461, "step": 9878 }, { "epoch": 2.7, "grad_norm": 1.5532353574468123, "learning_rate": 2.65201341620962e-07, "loss": 0.0423, "step": 9879 }, { "epoch": 2.7, "grad_norm": 1.5500560452724461, "learning_rate": 2.647279766575228e-07, "loss": 0.0442, "step": 9880 }, { "epoch": 2.7, "grad_norm": 1.2652048926676074, "learning_rate": 2.6425502304432027e-07, "loss": 0.037, "step": 9881 }, { "epoch": 2.7, "grad_norm": 1.5625283143804822, "learning_rate": 2.637824808224382e-07, "loss": 0.0475, "step": 9882 }, { "epoch": 2.7, "grad_norm": 1.2854026331583217, "learning_rate": 2.633103500329276e-07, "loss": 0.0333, "step": 9883 }, { "epoch": 2.7, "grad_norm": 1.4775010185234378, "learning_rate": 2.628386307167996e-07, "loss": 0.0426, "step": 9884 }, { "epoch": 2.7, "grad_norm": 1.3513125354332844, "learning_rate": 2.623673229150342e-07, "loss": 0.0368, "step": 9885 }, { "epoch": 2.7, "grad_norm": 1.3130380737818335, "learning_rate": 2.618964266685725e-07, "loss": 0.0415, "step": 9886 }, { "epoch": 2.7, "grad_norm": 1.5634218143317626, "learning_rate": 2.6142594201832183e-07, "loss": 0.0432, "step": 9887 }, { "epoch": 2.7, "grad_norm": 1.5746765441141195, "learning_rate": 2.6095586900515226e-07, "loss": 0.0448, "step": 9888 }, { "epoch": 2.7, "grad_norm": 1.4473319624113612, "learning_rate": 2.604862076699005e-07, "loss": 0.0455, "step": 9889 }, { "epoch": 2.7, "grad_norm": 1.2966509031495084, "learning_rate": 2.600169580533629e-07, "loss": 0.0354, "step": 9890 }, { "epoch": 2.7, "grad_norm": 1.4992169509356985, "learning_rate": 2.5954812019630515e-07, "loss": 0.0473, "step": 9891 }, { "epoch": 2.7, "grad_norm": 1.3649809612611266, "learning_rate": 2.59079694139453e-07, "loss": 0.0443, "step": 9892 }, { "epoch": 2.7, "grad_norm": 1.388705188417682, "learning_rate": 2.5861167992350055e-07, "loss": 0.0425, "step": 9893 }, { "epoch": 2.7, "grad_norm": 1.3531723257502692, "learning_rate": 2.5814407758910144e-07, "loss": 0.0397, "step": 9894 }, { "epoch": 2.7, "grad_norm": 1.3001448137766514, "learning_rate": 2.576768871768792e-07, "loss": 0.0395, "step": 9895 }, { "epoch": 2.7, "grad_norm": 1.3223272335887373, "learning_rate": 2.5721010872741536e-07, "loss": 0.0414, "step": 9896 }, { "epoch": 2.7, "grad_norm": 1.4408639068099598, "learning_rate": 2.567437422812602e-07, "loss": 0.0493, "step": 9897 }, { "epoch": 2.7, "grad_norm": 1.72025937387094, "learning_rate": 2.562777878789258e-07, "loss": 0.0538, "step": 9898 }, { "epoch": 2.7, "grad_norm": 1.3346530631338769, "learning_rate": 2.5581224556089024e-07, "loss": 0.0368, "step": 9899 }, { "epoch": 2.7, "grad_norm": 1.5658326787347971, "learning_rate": 2.55347115367594e-07, "loss": 0.046, "step": 9900 }, { "epoch": 2.7, "grad_norm": 1.4959811682537567, "learning_rate": 2.548823973394449e-07, "loss": 0.045, "step": 9901 }, { "epoch": 2.7, "grad_norm": 1.5733053667046126, "learning_rate": 2.544180915168093e-07, "loss": 0.0464, "step": 9902 }, { "epoch": 2.7, "grad_norm": 1.654203123008192, "learning_rate": 2.539541979400234e-07, "loss": 0.0456, "step": 9903 }, { "epoch": 2.7, "grad_norm": 1.3246762189597465, "learning_rate": 2.534907166493844e-07, "loss": 0.0389, "step": 9904 }, { "epoch": 2.7, "grad_norm": 1.5533663189290856, "learning_rate": 2.530276476851562e-07, "loss": 0.0416, "step": 9905 }, { "epoch": 2.7, "grad_norm": 1.2753222478415875, "learning_rate": 2.525649910875627e-07, "loss": 0.0397, "step": 9906 }, { "epoch": 2.7, "grad_norm": 1.483551393887903, "learning_rate": 2.5210274689679793e-07, "loss": 0.0381, "step": 9907 }, { "epoch": 2.7, "grad_norm": 1.3708261780449744, "learning_rate": 2.5164091515301357e-07, "loss": 0.0393, "step": 9908 }, { "epoch": 2.71, "grad_norm": 1.410171594673024, "learning_rate": 2.511794958963309e-07, "loss": 0.032, "step": 9909 }, { "epoch": 2.71, "grad_norm": 1.363809843280214, "learning_rate": 2.507184891668313e-07, "loss": 0.0394, "step": 9910 }, { "epoch": 2.71, "grad_norm": 1.5508238729801465, "learning_rate": 2.502578950045642e-07, "loss": 0.0484, "step": 9911 }, { "epoch": 2.71, "grad_norm": 1.2011950846821335, "learning_rate": 2.4979771344953885e-07, "loss": 0.0392, "step": 9912 }, { "epoch": 2.71, "grad_norm": 1.4010435126518643, "learning_rate": 2.493379445417338e-07, "loss": 0.0383, "step": 9913 }, { "epoch": 2.71, "grad_norm": 1.3276900789666153, "learning_rate": 2.488785883210859e-07, "loss": 0.0376, "step": 9914 }, { "epoch": 2.71, "grad_norm": 1.4916243828279114, "learning_rate": 2.4841964482750114e-07, "loss": 0.0465, "step": 9915 }, { "epoch": 2.71, "grad_norm": 1.357624878923833, "learning_rate": 2.479611141008459e-07, "loss": 0.0425, "step": 9916 }, { "epoch": 2.71, "grad_norm": 1.3952131153832825, "learning_rate": 2.4750299618095496e-07, "loss": 0.0465, "step": 9917 }, { "epoch": 2.71, "grad_norm": 1.6395275475593913, "learning_rate": 2.470452911076227e-07, "loss": 0.0427, "step": 9918 }, { "epoch": 2.71, "grad_norm": 1.6684740405017038, "learning_rate": 2.46587998920611e-07, "loss": 0.047, "step": 9919 }, { "epoch": 2.71, "grad_norm": 1.423408571455933, "learning_rate": 2.461311196596433e-07, "loss": 0.0419, "step": 9920 }, { "epoch": 2.71, "grad_norm": 1.3668621474877518, "learning_rate": 2.4567465336440945e-07, "loss": 0.0403, "step": 9921 }, { "epoch": 2.71, "grad_norm": 1.519298234099257, "learning_rate": 2.4521860007456153e-07, "loss": 0.045, "step": 9922 }, { "epoch": 2.71, "grad_norm": 1.434028095497877, "learning_rate": 2.4476295982971744e-07, "loss": 0.039, "step": 9923 }, { "epoch": 2.71, "grad_norm": 1.6306078409431466, "learning_rate": 2.443077326694582e-07, "loss": 0.0487, "step": 9924 }, { "epoch": 2.71, "grad_norm": 1.2544989117559147, "learning_rate": 2.438529186333288e-07, "loss": 0.0389, "step": 9925 }, { "epoch": 2.71, "grad_norm": 1.3657880574944892, "learning_rate": 2.4339851776083833e-07, "loss": 0.0394, "step": 9926 }, { "epoch": 2.71, "grad_norm": 1.4559968811636415, "learning_rate": 2.4294453009146124e-07, "loss": 0.0413, "step": 9927 }, { "epoch": 2.71, "grad_norm": 1.2071863618966017, "learning_rate": 2.424909556646343e-07, "loss": 0.0351, "step": 9928 }, { "epoch": 2.71, "grad_norm": 1.4972927043943733, "learning_rate": 2.4203779451975996e-07, "loss": 0.0453, "step": 9929 }, { "epoch": 2.71, "grad_norm": 1.687264033521166, "learning_rate": 2.415850466962044e-07, "loss": 0.0482, "step": 9930 }, { "epoch": 2.71, "grad_norm": 1.5271501050292777, "learning_rate": 2.4113271223329625e-07, "loss": 0.0469, "step": 9931 }, { "epoch": 2.71, "grad_norm": 1.6565599146316645, "learning_rate": 2.4068079117033014e-07, "loss": 0.0489, "step": 9932 }, { "epoch": 2.71, "grad_norm": 1.5123902046640314, "learning_rate": 2.402292835465647e-07, "loss": 0.0473, "step": 9933 }, { "epoch": 2.71, "grad_norm": 1.6855982618787908, "learning_rate": 2.3977818940122076e-07, "loss": 0.0479, "step": 9934 }, { "epoch": 2.71, "grad_norm": 1.6928475087006483, "learning_rate": 2.393275087734864e-07, "loss": 0.0539, "step": 9935 }, { "epoch": 2.71, "grad_norm": 1.3913679248040156, "learning_rate": 2.3887724170251094e-07, "loss": 0.0446, "step": 9936 }, { "epoch": 2.71, "grad_norm": 1.3818552292966644, "learning_rate": 2.384273882274091e-07, "loss": 0.0443, "step": 9937 }, { "epoch": 2.71, "grad_norm": 1.5723582684192356, "learning_rate": 2.3797794838725853e-07, "loss": 0.0481, "step": 9938 }, { "epoch": 2.71, "grad_norm": 1.435776775141003, "learning_rate": 2.37528922221103e-07, "loss": 0.0465, "step": 9939 }, { "epoch": 2.71, "grad_norm": 1.3581773442951404, "learning_rate": 2.370803097679486e-07, "loss": 0.0406, "step": 9940 }, { "epoch": 2.71, "grad_norm": 1.7031672105371836, "learning_rate": 2.3663211106676632e-07, "loss": 0.0524, "step": 9941 }, { "epoch": 2.71, "grad_norm": 1.2076553577210025, "learning_rate": 2.3618432615649057e-07, "loss": 0.0346, "step": 9942 }, { "epoch": 2.71, "grad_norm": 1.5094552801578587, "learning_rate": 2.3573695507602024e-07, "loss": 0.0435, "step": 9943 }, { "epoch": 2.71, "grad_norm": 1.3999864830432405, "learning_rate": 2.3528999786421758e-07, "loss": 0.0451, "step": 9944 }, { "epoch": 2.71, "grad_norm": 1.6972659242623396, "learning_rate": 2.3484345455991042e-07, "loss": 0.0469, "step": 9945 }, { "epoch": 2.72, "grad_norm": 1.3549589868860779, "learning_rate": 2.343973252018894e-07, "loss": 0.0372, "step": 9946 }, { "epoch": 2.72, "grad_norm": 1.2106799489140474, "learning_rate": 2.3395160982890963e-07, "loss": 0.0365, "step": 9947 }, { "epoch": 2.72, "grad_norm": 1.3687189059977076, "learning_rate": 2.335063084796907e-07, "loss": 0.0394, "step": 9948 }, { "epoch": 2.72, "grad_norm": 1.4694079445240011, "learning_rate": 2.3306142119291442e-07, "loss": 0.0395, "step": 9949 }, { "epoch": 2.72, "grad_norm": 1.5471581548787146, "learning_rate": 2.3261694800722767e-07, "loss": 0.0468, "step": 9950 }, { "epoch": 2.72, "grad_norm": 1.4013040981637193, "learning_rate": 2.3217288896124347e-07, "loss": 0.0427, "step": 9951 }, { "epoch": 2.72, "grad_norm": 1.3468518205794058, "learning_rate": 2.317292440935348e-07, "loss": 0.0407, "step": 9952 }, { "epoch": 2.72, "grad_norm": 1.3354133139502393, "learning_rate": 2.3128601344264257e-07, "loss": 0.0443, "step": 9953 }, { "epoch": 2.72, "grad_norm": 1.5329044954697, "learning_rate": 2.3084319704706925e-07, "loss": 0.0405, "step": 9954 }, { "epoch": 2.72, "grad_norm": 1.6971678156240182, "learning_rate": 2.3040079494528244e-07, "loss": 0.0489, "step": 9955 }, { "epoch": 2.72, "grad_norm": 1.3771926469195823, "learning_rate": 2.2995880717571195e-07, "loss": 0.0385, "step": 9956 }, { "epoch": 2.72, "grad_norm": 1.4183147383098516, "learning_rate": 2.2951723377675484e-07, "loss": 0.0397, "step": 9957 }, { "epoch": 2.72, "grad_norm": 1.5905894433028311, "learning_rate": 2.2907607478676818e-07, "loss": 0.0459, "step": 9958 }, { "epoch": 2.72, "grad_norm": 1.7465748885705494, "learning_rate": 2.28635330244078e-07, "loss": 0.045, "step": 9959 }, { "epoch": 2.72, "grad_norm": 1.4416913307087156, "learning_rate": 2.2819500018696927e-07, "loss": 0.042, "step": 9960 }, { "epoch": 2.72, "grad_norm": 1.878288101385799, "learning_rate": 2.277550846536941e-07, "loss": 0.0431, "step": 9961 }, { "epoch": 2.72, "grad_norm": 1.62103140011818, "learning_rate": 2.2731558368246698e-07, "loss": 0.0456, "step": 9962 }, { "epoch": 2.72, "grad_norm": 1.3686837715792661, "learning_rate": 2.2687649731146844e-07, "loss": 0.043, "step": 9963 }, { "epoch": 2.72, "grad_norm": 1.5648244152736621, "learning_rate": 2.264378255788402e-07, "loss": 0.0508, "step": 9964 }, { "epoch": 2.72, "grad_norm": 1.498949245338006, "learning_rate": 2.2599956852269067e-07, "loss": 0.0523, "step": 9965 }, { "epoch": 2.72, "grad_norm": 1.7187470608975022, "learning_rate": 2.2556172618108996e-07, "loss": 0.0406, "step": 9966 }, { "epoch": 2.72, "grad_norm": 1.3217549152220947, "learning_rate": 2.2512429859207375e-07, "loss": 0.0389, "step": 9967 }, { "epoch": 2.72, "grad_norm": 1.5951556901078363, "learning_rate": 2.2468728579363997e-07, "loss": 0.0426, "step": 9968 }, { "epoch": 2.72, "grad_norm": 1.2124102859218526, "learning_rate": 2.242506878237538e-07, "loss": 0.0292, "step": 9969 }, { "epoch": 2.72, "grad_norm": 1.7050345942832428, "learning_rate": 2.2381450472033995e-07, "loss": 0.0485, "step": 9970 }, { "epoch": 2.72, "grad_norm": 1.4818539513574767, "learning_rate": 2.2337873652129084e-07, "loss": 0.039, "step": 9971 }, { "epoch": 2.72, "grad_norm": 1.3533995033261557, "learning_rate": 2.229433832644623e-07, "loss": 0.0407, "step": 9972 }, { "epoch": 2.72, "grad_norm": 1.6947179935281724, "learning_rate": 2.2250844498767077e-07, "loss": 0.0438, "step": 9973 }, { "epoch": 2.72, "grad_norm": 1.5884973453111142, "learning_rate": 2.2207392172870047e-07, "loss": 0.0462, "step": 9974 }, { "epoch": 2.72, "grad_norm": 1.1457296533193817, "learning_rate": 2.2163981352529728e-07, "loss": 0.034, "step": 9975 }, { "epoch": 2.72, "grad_norm": 1.4100688335896259, "learning_rate": 2.2120612041517387e-07, "loss": 0.0426, "step": 9976 }, { "epoch": 2.72, "grad_norm": 1.8737929615612752, "learning_rate": 2.2077284243600227e-07, "loss": 0.0498, "step": 9977 }, { "epoch": 2.72, "grad_norm": 1.469506257465614, "learning_rate": 2.203399796254241e-07, "loss": 0.0522, "step": 9978 }, { "epoch": 2.72, "grad_norm": 1.421875687400047, "learning_rate": 2.199075320210392e-07, "loss": 0.0427, "step": 9979 }, { "epoch": 2.72, "grad_norm": 1.427953189732144, "learning_rate": 2.1947549966041537e-07, "loss": 0.0439, "step": 9980 }, { "epoch": 2.72, "grad_norm": 1.3106523566682518, "learning_rate": 2.19043882581082e-07, "loss": 0.0411, "step": 9981 }, { "epoch": 2.73, "grad_norm": 1.377868047063109, "learning_rate": 2.1861268082053466e-07, "loss": 0.0454, "step": 9982 }, { "epoch": 2.73, "grad_norm": 1.53894493699053, "learning_rate": 2.1818189441623061e-07, "loss": 0.0437, "step": 9983 }, { "epoch": 2.73, "grad_norm": 1.2742909729142209, "learning_rate": 2.1775152340559325e-07, "loss": 0.0412, "step": 9984 }, { "epoch": 2.73, "grad_norm": 1.3057018754904868, "learning_rate": 2.173215678260071e-07, "loss": 0.0391, "step": 9985 }, { "epoch": 2.73, "grad_norm": 1.6073207467725237, "learning_rate": 2.1689202771482344e-07, "loss": 0.0472, "step": 9986 }, { "epoch": 2.73, "grad_norm": 1.491532424567738, "learning_rate": 2.164629031093546e-07, "loss": 0.0415, "step": 9987 }, { "epoch": 2.73, "grad_norm": 1.4890768238619554, "learning_rate": 2.160341940468802e-07, "loss": 0.0352, "step": 9988 }, { "epoch": 2.73, "grad_norm": 1.286049031602354, "learning_rate": 2.156059005646405e-07, "loss": 0.0357, "step": 9989 }, { "epoch": 2.73, "grad_norm": 1.5091075252335213, "learning_rate": 2.151780226998429e-07, "loss": 0.0506, "step": 9990 }, { "epoch": 2.73, "grad_norm": 1.3155509652951816, "learning_rate": 2.1475056048965437e-07, "loss": 0.0392, "step": 9991 }, { "epoch": 2.73, "grad_norm": 1.4985696667725794, "learning_rate": 2.1432351397121021e-07, "loss": 0.0407, "step": 9992 }, { "epoch": 2.73, "grad_norm": 1.2810530446134696, "learning_rate": 2.1389688318160683e-07, "loss": 0.0432, "step": 9993 }, { "epoch": 2.73, "grad_norm": 1.3036727045533048, "learning_rate": 2.1347066815790574e-07, "loss": 0.0373, "step": 9994 }, { "epoch": 2.73, "grad_norm": 1.385305085361356, "learning_rate": 2.1304486893713172e-07, "loss": 0.0457, "step": 9995 }, { "epoch": 2.73, "grad_norm": 1.4327354350419996, "learning_rate": 2.1261948555627464e-07, "loss": 0.0434, "step": 9996 }, { "epoch": 2.73, "grad_norm": 1.529576686493087, "learning_rate": 2.1219451805228607e-07, "loss": 0.0469, "step": 9997 }, { "epoch": 2.73, "grad_norm": 1.5254670769013934, "learning_rate": 2.1176996646208313e-07, "loss": 0.0394, "step": 9998 }, { "epoch": 2.73, "grad_norm": 1.577544313339664, "learning_rate": 2.113458308225458e-07, "loss": 0.053, "step": 9999 }, { "epoch": 2.73, "grad_norm": 1.3262685690873348, "learning_rate": 2.109221111705201e-07, "loss": 0.0423, "step": 10000 }, { "epoch": 2.73, "grad_norm": 1.4070531625072271, "learning_rate": 2.104988075428127e-07, "loss": 0.0343, "step": 10001 }, { "epoch": 2.73, "grad_norm": 1.3782776834064139, "learning_rate": 2.1007591997619703e-07, "loss": 0.0419, "step": 10002 }, { "epoch": 2.73, "grad_norm": 1.4366752438404735, "learning_rate": 2.0965344850740698e-07, "loss": 0.0357, "step": 10003 }, { "epoch": 2.73, "grad_norm": 1.5937770558453033, "learning_rate": 2.092313931731449e-07, "loss": 0.0446, "step": 10004 }, { "epoch": 2.73, "grad_norm": 1.8474194095435017, "learning_rate": 2.0880975401007253e-07, "loss": 0.0484, "step": 10005 }, { "epoch": 2.73, "grad_norm": 1.616401014842261, "learning_rate": 2.0838853105481838e-07, "loss": 0.0466, "step": 10006 }, { "epoch": 2.73, "grad_norm": 1.5626912153334243, "learning_rate": 2.079677243439743e-07, "loss": 0.0478, "step": 10007 }, { "epoch": 2.73, "grad_norm": 1.4043662609913272, "learning_rate": 2.0754733391409486e-07, "loss": 0.0483, "step": 10008 }, { "epoch": 2.73, "grad_norm": 1.4271267332321544, "learning_rate": 2.0712735980169819e-07, "loss": 0.0409, "step": 10009 }, { "epoch": 2.73, "grad_norm": 1.317669235773752, "learning_rate": 2.067078020432689e-07, "loss": 0.0398, "step": 10010 }, { "epoch": 2.73, "grad_norm": 1.2652711012815854, "learning_rate": 2.0628866067525288e-07, "loss": 0.0362, "step": 10011 }, { "epoch": 2.73, "grad_norm": 1.2568785258001451, "learning_rate": 2.05869935734061e-07, "loss": 0.0405, "step": 10012 }, { "epoch": 2.73, "grad_norm": 1.4448691240660754, "learning_rate": 2.0545162725606693e-07, "loss": 0.039, "step": 10013 }, { "epoch": 2.73, "grad_norm": 1.6647068135034264, "learning_rate": 2.0503373527760994e-07, "loss": 0.0512, "step": 10014 }, { "epoch": 2.73, "grad_norm": 1.4890586533894707, "learning_rate": 2.04616259834991e-07, "loss": 0.042, "step": 10015 }, { "epoch": 2.73, "grad_norm": 1.5012071702584902, "learning_rate": 2.0419920096447666e-07, "loss": 0.0415, "step": 10016 }, { "epoch": 2.73, "grad_norm": 1.4980712082252385, "learning_rate": 2.0378255870229625e-07, "loss": 0.0531, "step": 10017 }, { "epoch": 2.73, "grad_norm": 1.7202242107131056, "learning_rate": 2.033663330846436e-07, "loss": 0.0493, "step": 10018 }, { "epoch": 2.74, "grad_norm": 1.7811727008363314, "learning_rate": 2.0295052414767535e-07, "loss": 0.0449, "step": 10019 }, { "epoch": 2.74, "grad_norm": 1.2868436309825144, "learning_rate": 2.0253513192751374e-07, "loss": 0.035, "step": 10020 }, { "epoch": 2.74, "grad_norm": 1.6101278480111838, "learning_rate": 2.0212015646024152e-07, "loss": 0.0493, "step": 10021 }, { "epoch": 2.74, "grad_norm": 1.595526022662632, "learning_rate": 2.017055977819099e-07, "loss": 0.0502, "step": 10022 }, { "epoch": 2.74, "grad_norm": 1.654964965433621, "learning_rate": 2.0129145592852893e-07, "loss": 0.0485, "step": 10023 }, { "epoch": 2.74, "grad_norm": 1.5614816234156104, "learning_rate": 2.008777309360771e-07, "loss": 0.0535, "step": 10024 }, { "epoch": 2.74, "grad_norm": 1.50195589348083, "learning_rate": 2.0046442284049339e-07, "loss": 0.0426, "step": 10025 }, { "epoch": 2.74, "grad_norm": 1.3773806432243265, "learning_rate": 2.0005153167768133e-07, "loss": 0.043, "step": 10026 }, { "epoch": 2.74, "grad_norm": 1.4055825170233591, "learning_rate": 1.9963905748350888e-07, "loss": 0.0455, "step": 10027 }, { "epoch": 2.74, "grad_norm": 1.2510483783281026, "learning_rate": 1.9922700029380737e-07, "loss": 0.0335, "step": 10028 }, { "epoch": 2.74, "grad_norm": 1.4439127659282065, "learning_rate": 1.9881536014437153e-07, "loss": 0.0479, "step": 10029 }, { "epoch": 2.74, "grad_norm": 1.3495244636676933, "learning_rate": 1.9840413707096162e-07, "loss": 0.0383, "step": 10030 }, { "epoch": 2.74, "grad_norm": 1.330462975670874, "learning_rate": 1.9799333110929907e-07, "loss": 0.0462, "step": 10031 }, { "epoch": 2.74, "grad_norm": 1.4918709077584789, "learning_rate": 1.9758294229507092e-07, "loss": 0.0522, "step": 10032 }, { "epoch": 2.74, "grad_norm": 1.340969991679822, "learning_rate": 1.9717297066392638e-07, "loss": 0.0416, "step": 10033 }, { "epoch": 2.74, "grad_norm": 1.3934030593697044, "learning_rate": 1.9676341625148144e-07, "loss": 0.0311, "step": 10034 }, { "epoch": 2.74, "grad_norm": 1.4321468062799512, "learning_rate": 1.963542790933115e-07, "loss": 0.0398, "step": 10035 }, { "epoch": 2.74, "grad_norm": 1.3765965114976613, "learning_rate": 1.959455592249604e-07, "loss": 0.0386, "step": 10036 }, { "epoch": 2.74, "grad_norm": 1.4293559334957437, "learning_rate": 1.9553725668193192e-07, "loss": 0.041, "step": 10037 }, { "epoch": 2.74, "grad_norm": 1.4517457291569, "learning_rate": 1.9512937149969546e-07, "loss": 0.0388, "step": 10038 }, { "epoch": 2.74, "grad_norm": 1.2496325312482004, "learning_rate": 1.947219037136827e-07, "loss": 0.0359, "step": 10039 }, { "epoch": 2.74, "grad_norm": 1.2256996888267322, "learning_rate": 1.94314853359292e-07, "loss": 0.0368, "step": 10040 }, { "epoch": 2.74, "grad_norm": 1.6438257390620437, "learning_rate": 1.939082204718823e-07, "loss": 0.0524, "step": 10041 }, { "epoch": 2.74, "grad_norm": 1.4835166261340051, "learning_rate": 1.9350200508677863e-07, "loss": 0.044, "step": 10042 }, { "epoch": 2.74, "grad_norm": 1.5909502228181926, "learning_rate": 1.9309620723926725e-07, "loss": 0.0513, "step": 10043 }, { "epoch": 2.74, "grad_norm": 1.3909438929297275, "learning_rate": 1.9269082696460106e-07, "loss": 0.0443, "step": 10044 }, { "epoch": 2.74, "grad_norm": 1.669266724077017, "learning_rate": 1.9228586429799356e-07, "loss": 0.0457, "step": 10045 }, { "epoch": 2.74, "grad_norm": 1.6168068068434518, "learning_rate": 1.9188131927462493e-07, "loss": 0.0548, "step": 10046 }, { "epoch": 2.74, "grad_norm": 1.5149264983368511, "learning_rate": 1.9147719192963655e-07, "loss": 0.0442, "step": 10047 }, { "epoch": 2.74, "grad_norm": 1.5974634272137342, "learning_rate": 1.910734822981364e-07, "loss": 0.0441, "step": 10048 }, { "epoch": 2.74, "grad_norm": 1.4004483993784096, "learning_rate": 1.9067019041519363e-07, "loss": 0.0402, "step": 10049 }, { "epoch": 2.74, "grad_norm": 1.1847403296842942, "learning_rate": 1.9026731631584194e-07, "loss": 0.0333, "step": 10050 }, { "epoch": 2.74, "grad_norm": 1.5837068412913442, "learning_rate": 1.8986486003507776e-07, "loss": 0.048, "step": 10051 }, { "epoch": 2.74, "grad_norm": 1.5652989833217095, "learning_rate": 1.8946282160786421e-07, "loss": 0.0497, "step": 10052 }, { "epoch": 2.74, "grad_norm": 1.281581513093705, "learning_rate": 1.8906120106912452e-07, "loss": 0.0379, "step": 10053 }, { "epoch": 2.74, "grad_norm": 1.410325844236411, "learning_rate": 1.8865999845374794e-07, "loss": 0.0445, "step": 10054 }, { "epoch": 2.75, "grad_norm": 1.3005625137015098, "learning_rate": 1.8825921379658718e-07, "loss": 0.0404, "step": 10055 }, { "epoch": 2.75, "grad_norm": 1.511000452274161, "learning_rate": 1.8785884713245718e-07, "loss": 0.0453, "step": 10056 }, { "epoch": 2.75, "grad_norm": 1.2323975640121196, "learning_rate": 1.8745889849613786e-07, "loss": 0.0351, "step": 10057 }, { "epoch": 2.75, "grad_norm": 1.517902785221159, "learning_rate": 1.8705936792237255e-07, "loss": 0.0508, "step": 10058 }, { "epoch": 2.75, "grad_norm": 1.4283797107413627, "learning_rate": 1.8666025544586796e-07, "loss": 0.0391, "step": 10059 }, { "epoch": 2.75, "grad_norm": 1.745277829113621, "learning_rate": 1.862615611012958e-07, "loss": 0.0517, "step": 10060 }, { "epoch": 2.75, "grad_norm": 1.289379322109334, "learning_rate": 1.8586328492328942e-07, "loss": 0.0417, "step": 10061 }, { "epoch": 2.75, "grad_norm": 1.3214675718049216, "learning_rate": 1.854654269464473e-07, "loss": 0.0329, "step": 10062 }, { "epoch": 2.75, "grad_norm": 1.440840097146051, "learning_rate": 1.8506798720533014e-07, "loss": 0.0402, "step": 10063 }, { "epoch": 2.75, "grad_norm": 1.5255387496552482, "learning_rate": 1.8467096573446418e-07, "loss": 0.0429, "step": 10064 }, { "epoch": 2.75, "grad_norm": 1.4651518674310213, "learning_rate": 1.8427436256833853e-07, "loss": 0.0393, "step": 10065 }, { "epoch": 2.75, "grad_norm": 1.2504394114205686, "learning_rate": 1.838781777414056e-07, "loss": 0.0384, "step": 10066 }, { "epoch": 2.75, "grad_norm": 1.3723061421069038, "learning_rate": 1.8348241128808285e-07, "loss": 0.0424, "step": 10067 }, { "epoch": 2.75, "grad_norm": 1.2943048882035808, "learning_rate": 1.8308706324274783e-07, "loss": 0.0381, "step": 10068 }, { "epoch": 2.75, "grad_norm": 1.269121210348574, "learning_rate": 1.8269213363974637e-07, "loss": 0.0337, "step": 10069 }, { "epoch": 2.75, "grad_norm": 1.645592678855142, "learning_rate": 1.822976225133838e-07, "loss": 0.0471, "step": 10070 }, { "epoch": 2.75, "grad_norm": 1.7757937018933823, "learning_rate": 1.8190352989793325e-07, "loss": 0.0601, "step": 10071 }, { "epoch": 2.75, "grad_norm": 1.494229272229063, "learning_rate": 1.8150985582762792e-07, "loss": 0.0424, "step": 10072 }, { "epoch": 2.75, "grad_norm": 1.2880897944663652, "learning_rate": 1.8111660033666767e-07, "loss": 0.039, "step": 10073 }, { "epoch": 2.75, "grad_norm": 1.515622071739428, "learning_rate": 1.8072376345921127e-07, "loss": 0.0432, "step": 10074 }, { "epoch": 2.75, "grad_norm": 1.4881606835057053, "learning_rate": 1.8033134522938701e-07, "loss": 0.0374, "step": 10075 }, { "epoch": 2.75, "grad_norm": 1.5407343913258063, "learning_rate": 1.7993934568128256e-07, "loss": 0.0453, "step": 10076 }, { "epoch": 2.75, "grad_norm": 1.4138531751618018, "learning_rate": 1.7954776484895188e-07, "loss": 0.0353, "step": 10077 }, { "epoch": 2.75, "grad_norm": 1.5924006685294518, "learning_rate": 1.7915660276641045e-07, "loss": 0.0452, "step": 10078 }, { "epoch": 2.75, "grad_norm": 1.5119785026649646, "learning_rate": 1.7876585946763892e-07, "loss": 0.045, "step": 10079 }, { "epoch": 2.75, "grad_norm": 1.5187261686703846, "learning_rate": 1.7837553498657955e-07, "loss": 0.0511, "step": 10080 }, { "epoch": 2.75, "grad_norm": 1.413272087148863, "learning_rate": 1.7798562935714082e-07, "loss": 0.0372, "step": 10081 }, { "epoch": 2.75, "grad_norm": 1.5096797310966612, "learning_rate": 1.7759614261319337e-07, "loss": 0.0461, "step": 10082 }, { "epoch": 2.75, "grad_norm": 1.5166389965927052, "learning_rate": 1.772070747885718e-07, "loss": 0.044, "step": 10083 }, { "epoch": 2.75, "grad_norm": 1.4355471934432087, "learning_rate": 1.7681842591707465e-07, "loss": 0.0378, "step": 10084 }, { "epoch": 2.75, "grad_norm": 1.5176718536389435, "learning_rate": 1.764301960324627e-07, "loss": 0.0414, "step": 10085 }, { "epoch": 2.75, "grad_norm": 1.5046547218333923, "learning_rate": 1.7604238516846062e-07, "loss": 0.0437, "step": 10086 }, { "epoch": 2.75, "grad_norm": 1.5010675303381087, "learning_rate": 1.7565499335875924e-07, "loss": 0.0392, "step": 10087 }, { "epoch": 2.75, "grad_norm": 1.5329800147829753, "learning_rate": 1.7526802063700943e-07, "loss": 0.0461, "step": 10088 }, { "epoch": 2.75, "grad_norm": 1.323494455746387, "learning_rate": 1.748814670368282e-07, "loss": 0.0348, "step": 10089 }, { "epoch": 2.75, "grad_norm": 1.4984739245776257, "learning_rate": 1.744953325917953e-07, "loss": 0.0426, "step": 10090 }, { "epoch": 2.75, "grad_norm": 1.4033968647536448, "learning_rate": 1.741096173354534e-07, "loss": 0.0396, "step": 10091 }, { "epoch": 2.76, "grad_norm": 1.3598023944094868, "learning_rate": 1.7372432130130955e-07, "loss": 0.0401, "step": 10092 }, { "epoch": 2.76, "grad_norm": 1.5191944219745634, "learning_rate": 1.7333944452283425e-07, "loss": 0.0458, "step": 10093 }, { "epoch": 2.76, "grad_norm": 1.6974850785826827, "learning_rate": 1.729549870334607e-07, "loss": 0.0514, "step": 10094 }, { "epoch": 2.76, "grad_norm": 2.082311200400368, "learning_rate": 1.725709488665883e-07, "loss": 0.0465, "step": 10095 }, { "epoch": 2.76, "grad_norm": 1.5926724684163749, "learning_rate": 1.7218733005557707e-07, "loss": 0.0494, "step": 10096 }, { "epoch": 2.76, "grad_norm": 1.432621228268689, "learning_rate": 1.71804130633752e-07, "loss": 0.0427, "step": 10097 }, { "epoch": 2.76, "grad_norm": 1.3469829644280789, "learning_rate": 1.7142135063440034e-07, "loss": 0.0433, "step": 10098 }, { "epoch": 2.76, "grad_norm": 1.5759139631588548, "learning_rate": 1.7103899009077606e-07, "loss": 0.0486, "step": 10099 }, { "epoch": 2.76, "grad_norm": 1.5959131723318278, "learning_rate": 1.7065704903609259e-07, "loss": 0.0461, "step": 10100 }, { "epoch": 2.76, "grad_norm": 1.313403489637816, "learning_rate": 1.7027552750353005e-07, "loss": 0.0355, "step": 10101 }, { "epoch": 2.76, "grad_norm": 1.4811476128264136, "learning_rate": 1.6989442552623082e-07, "loss": 0.0499, "step": 10102 }, { "epoch": 2.76, "grad_norm": 1.3768012253187047, "learning_rate": 1.695137431373006e-07, "loss": 0.039, "step": 10103 }, { "epoch": 2.76, "grad_norm": 1.4296769551684576, "learning_rate": 1.6913348036980914e-07, "loss": 0.0447, "step": 10104 }, { "epoch": 2.76, "grad_norm": 1.555174933504298, "learning_rate": 1.6875363725679052e-07, "loss": 0.0499, "step": 10105 }, { "epoch": 2.76, "grad_norm": 1.6434851184884691, "learning_rate": 1.683742138312394e-07, "loss": 0.0439, "step": 10106 }, { "epoch": 2.76, "grad_norm": 1.5512538620189686, "learning_rate": 1.6799521012611843e-07, "loss": 0.0452, "step": 10107 }, { "epoch": 2.76, "grad_norm": 1.4046692988104377, "learning_rate": 1.676166261743506e-07, "loss": 0.0425, "step": 10108 }, { "epoch": 2.76, "grad_norm": 1.6908210894622395, "learning_rate": 1.67238462008823e-07, "loss": 0.0542, "step": 10109 }, { "epoch": 2.76, "grad_norm": 1.6627767438976886, "learning_rate": 1.66860717662386e-07, "loss": 0.0431, "step": 10110 }, { "epoch": 2.76, "grad_norm": 1.7289206635384784, "learning_rate": 1.6648339316785556e-07, "loss": 0.0427, "step": 10111 }, { "epoch": 2.76, "grad_norm": 1.4980696733457581, "learning_rate": 1.6610648855800772e-07, "loss": 0.0415, "step": 10112 }, { "epoch": 2.76, "grad_norm": 1.5352621398930482, "learning_rate": 1.657300038655857e-07, "loss": 0.043, "step": 10113 }, { "epoch": 2.76, "grad_norm": 1.3654952467606503, "learning_rate": 1.6535393912329388e-07, "loss": 0.0388, "step": 10114 }, { "epoch": 2.76, "grad_norm": 1.2492204846769608, "learning_rate": 1.6497829436380009e-07, "loss": 0.0384, "step": 10115 }, { "epoch": 2.76, "grad_norm": 1.5246298453278262, "learning_rate": 1.6460306961973705e-07, "loss": 0.0444, "step": 10116 }, { "epoch": 2.76, "grad_norm": 1.5581795102414493, "learning_rate": 1.6422826492370037e-07, "loss": 0.05, "step": 10117 }, { "epoch": 2.76, "grad_norm": 1.3456830726366065, "learning_rate": 1.6385388030824844e-07, "loss": 0.0341, "step": 10118 }, { "epoch": 2.76, "grad_norm": 1.3718080844963219, "learning_rate": 1.6347991580590472e-07, "loss": 0.0508, "step": 10119 }, { "epoch": 2.76, "grad_norm": 1.6513661442086138, "learning_rate": 1.6310637144915542e-07, "loss": 0.0527, "step": 10120 }, { "epoch": 2.76, "grad_norm": 1.316832159190448, "learning_rate": 1.6273324727044905e-07, "loss": 0.0394, "step": 10121 }, { "epoch": 2.76, "grad_norm": 1.2826645293382917, "learning_rate": 1.6236054330219853e-07, "loss": 0.0348, "step": 10122 }, { "epoch": 2.76, "grad_norm": 1.3530485335628175, "learning_rate": 1.619882595767819e-07, "loss": 0.043, "step": 10123 }, { "epoch": 2.76, "grad_norm": 1.58148363927937, "learning_rate": 1.6161639612653824e-07, "loss": 0.0485, "step": 10124 }, { "epoch": 2.76, "grad_norm": 1.3947319197264723, "learning_rate": 1.612449529837712e-07, "loss": 0.0365, "step": 10125 }, { "epoch": 2.76, "grad_norm": 1.7408314190435925, "learning_rate": 1.6087393018074825e-07, "loss": 0.0492, "step": 10126 }, { "epoch": 2.76, "grad_norm": 1.5687440109595219, "learning_rate": 1.605033277496998e-07, "loss": 0.0471, "step": 10127 }, { "epoch": 2.76, "grad_norm": 1.352200757766226, "learning_rate": 1.601331457228189e-07, "loss": 0.0412, "step": 10128 }, { "epoch": 2.77, "grad_norm": 1.4702556117474788, "learning_rate": 1.597633841322638e-07, "loss": 0.0409, "step": 10129 }, { "epoch": 2.77, "grad_norm": 1.697997663766319, "learning_rate": 1.5939404301015537e-07, "loss": 0.0403, "step": 10130 }, { "epoch": 2.77, "grad_norm": 1.5204800355367796, "learning_rate": 1.590251223885786e-07, "loss": 0.0486, "step": 10131 }, { "epoch": 2.77, "grad_norm": 1.4234644478681038, "learning_rate": 1.5865662229958112e-07, "loss": 0.0428, "step": 10132 }, { "epoch": 2.77, "grad_norm": 1.4673909107302767, "learning_rate": 1.5828854277517404e-07, "loss": 0.0397, "step": 10133 }, { "epoch": 2.77, "grad_norm": 1.6504606247051161, "learning_rate": 1.5792088384733174e-07, "loss": 0.0417, "step": 10134 }, { "epoch": 2.77, "grad_norm": 1.4207983775597695, "learning_rate": 1.5755364554799367e-07, "loss": 0.0353, "step": 10135 }, { "epoch": 2.77, "grad_norm": 1.4866392758209426, "learning_rate": 1.5718682790906048e-07, "loss": 0.0448, "step": 10136 }, { "epoch": 2.77, "grad_norm": 1.3952214728060652, "learning_rate": 1.568204309623983e-07, "loss": 0.0414, "step": 10137 }, { "epoch": 2.77, "grad_norm": 1.4779424049223246, "learning_rate": 1.5645445473983557e-07, "loss": 0.0468, "step": 10138 }, { "epoch": 2.77, "grad_norm": 1.4092837774680038, "learning_rate": 1.5608889927316407e-07, "loss": 0.04, "step": 10139 }, { "epoch": 2.77, "grad_norm": 1.6254130604515964, "learning_rate": 1.5572376459413897e-07, "loss": 0.045, "step": 10140 }, { "epoch": 2.77, "grad_norm": 1.3558297525643035, "learning_rate": 1.55359050734481e-07, "loss": 0.0402, "step": 10141 }, { "epoch": 2.77, "grad_norm": 1.384797794915866, "learning_rate": 1.549947577258709e-07, "loss": 0.0393, "step": 10142 }, { "epoch": 2.77, "grad_norm": 1.3470437893134084, "learning_rate": 1.5463088559995564e-07, "loss": 0.0356, "step": 10143 }, { "epoch": 2.77, "grad_norm": 1.3570426854956028, "learning_rate": 1.5426743438834436e-07, "loss": 0.0437, "step": 10144 }, { "epoch": 2.77, "grad_norm": 1.516786162655479, "learning_rate": 1.5390440412260954e-07, "loss": 0.0415, "step": 10145 }, { "epoch": 2.77, "grad_norm": 1.2551763850367763, "learning_rate": 1.535417948342871e-07, "loss": 0.0312, "step": 10146 }, { "epoch": 2.77, "grad_norm": 1.365626797717536, "learning_rate": 1.531796065548774e-07, "loss": 0.0444, "step": 10147 }, { "epoch": 2.77, "grad_norm": 1.3184970338463724, "learning_rate": 1.5281783931584303e-07, "loss": 0.0416, "step": 10148 }, { "epoch": 2.77, "grad_norm": 1.3837442956832846, "learning_rate": 1.524564931486111e-07, "loss": 0.0384, "step": 10149 }, { "epoch": 2.77, "grad_norm": 1.418513100036654, "learning_rate": 1.5209556808457093e-07, "loss": 0.0338, "step": 10150 }, { "epoch": 2.77, "grad_norm": 1.5049714897159012, "learning_rate": 1.5173506415507632e-07, "loss": 0.0465, "step": 10151 }, { "epoch": 2.77, "grad_norm": 1.413141116128674, "learning_rate": 1.5137498139144336e-07, "loss": 0.0406, "step": 10152 }, { "epoch": 2.77, "grad_norm": 1.6949414870341155, "learning_rate": 1.510153198249531e-07, "loss": 0.0443, "step": 10153 }, { "epoch": 2.77, "grad_norm": 1.356071886152123, "learning_rate": 1.506560794868478e-07, "loss": 0.0423, "step": 10154 }, { "epoch": 2.77, "grad_norm": 1.2555536510251804, "learning_rate": 1.5029726040833638e-07, "loss": 0.0403, "step": 10155 }, { "epoch": 2.77, "grad_norm": 1.4800940028116727, "learning_rate": 1.4993886262058833e-07, "loss": 0.0476, "step": 10156 }, { "epoch": 2.77, "grad_norm": 1.4765918271488911, "learning_rate": 1.4958088615473598e-07, "loss": 0.0463, "step": 10157 }, { "epoch": 2.77, "grad_norm": 1.4769284088971608, "learning_rate": 1.4922333104187892e-07, "loss": 0.052, "step": 10158 }, { "epoch": 2.77, "grad_norm": 1.4769669964561707, "learning_rate": 1.4886619731307617e-07, "loss": 0.0463, "step": 10159 }, { "epoch": 2.77, "grad_norm": 1.656253393306451, "learning_rate": 1.485094849993529e-07, "loss": 0.0507, "step": 10160 }, { "epoch": 2.77, "grad_norm": 1.4442834023060762, "learning_rate": 1.481531941316955e-07, "loss": 0.0403, "step": 10161 }, { "epoch": 2.77, "grad_norm": 1.3963816038453938, "learning_rate": 1.4779732474105525e-07, "loss": 0.0456, "step": 10162 }, { "epoch": 2.77, "grad_norm": 1.5250482284270535, "learning_rate": 1.4744187685834576e-07, "loss": 0.0418, "step": 10163 }, { "epoch": 2.77, "grad_norm": 1.2213055780938222, "learning_rate": 1.4708685051444515e-07, "loss": 0.033, "step": 10164 }, { "epoch": 2.78, "grad_norm": 1.4716080702731333, "learning_rate": 1.4673224574019373e-07, "loss": 0.0425, "step": 10165 }, { "epoch": 2.78, "grad_norm": 1.3351171757890703, "learning_rate": 1.4637806256639685e-07, "loss": 0.0334, "step": 10166 }, { "epoch": 2.78, "grad_norm": 1.753367743117955, "learning_rate": 1.460243010238216e-07, "loss": 0.0418, "step": 10167 }, { "epoch": 2.78, "grad_norm": 1.7239401447375389, "learning_rate": 1.4567096114319833e-07, "loss": 0.0544, "step": 10168 }, { "epoch": 2.78, "grad_norm": 1.359980583791488, "learning_rate": 1.4531804295522256e-07, "loss": 0.0385, "step": 10169 }, { "epoch": 2.78, "grad_norm": 1.760715249627928, "learning_rate": 1.449655464905514e-07, "loss": 0.0524, "step": 10170 }, { "epoch": 2.78, "grad_norm": 1.5336253529150983, "learning_rate": 1.4461347177980644e-07, "loss": 0.0441, "step": 10171 }, { "epoch": 2.78, "grad_norm": 1.2444795763847114, "learning_rate": 1.4426181885357215e-07, "loss": 0.0367, "step": 10172 }, { "epoch": 2.78, "grad_norm": 1.3773821883987891, "learning_rate": 1.439105877423963e-07, "loss": 0.0452, "step": 10173 }, { "epoch": 2.78, "grad_norm": 1.6067729145648395, "learning_rate": 1.4355977847679004e-07, "loss": 0.0405, "step": 10174 }, { "epoch": 2.78, "grad_norm": 1.557517681061879, "learning_rate": 1.432093910872273e-07, "loss": 0.0437, "step": 10175 }, { "epoch": 2.78, "grad_norm": 1.6638289089761185, "learning_rate": 1.4285942560414768e-07, "loss": 0.0412, "step": 10176 }, { "epoch": 2.78, "grad_norm": 1.7623261820886194, "learning_rate": 1.4250988205795068e-07, "loss": 0.0444, "step": 10177 }, { "epoch": 2.78, "grad_norm": 1.6768483942338641, "learning_rate": 1.421607604790026e-07, "loss": 0.0506, "step": 10178 }, { "epoch": 2.78, "grad_norm": 1.6491985570417915, "learning_rate": 1.4181206089763033e-07, "loss": 0.0487, "step": 10179 }, { "epoch": 2.78, "grad_norm": 1.3607398917969995, "learning_rate": 1.414637833441257e-07, "loss": 0.0414, "step": 10180 }, { "epoch": 2.78, "grad_norm": 1.2958749745842397, "learning_rate": 1.4111592784874285e-07, "loss": 0.0335, "step": 10181 }, { "epoch": 2.78, "grad_norm": 1.3815602599142944, "learning_rate": 1.4076849444170036e-07, "loss": 0.0403, "step": 10182 }, { "epoch": 2.78, "grad_norm": 1.1496738231043915, "learning_rate": 1.4042148315317862e-07, "loss": 0.0365, "step": 10183 }, { "epoch": 2.78, "grad_norm": 1.392121912593456, "learning_rate": 1.400748940133234e-07, "loss": 0.0442, "step": 10184 }, { "epoch": 2.78, "grad_norm": 1.3662152606491669, "learning_rate": 1.3972872705224238e-07, "loss": 0.0346, "step": 10185 }, { "epoch": 2.78, "grad_norm": 1.5519988603068833, "learning_rate": 1.3938298230000646e-07, "loss": 0.0486, "step": 10186 }, { "epoch": 2.78, "grad_norm": 1.4999385781894288, "learning_rate": 1.3903765978665052e-07, "loss": 0.0451, "step": 10187 }, { "epoch": 2.78, "grad_norm": 4.808520194515675, "learning_rate": 1.3869275954217275e-07, "loss": 0.042, "step": 10188 }, { "epoch": 2.78, "grad_norm": 1.6806842765022694, "learning_rate": 1.3834828159653368e-07, "loss": 0.0466, "step": 10189 }, { "epoch": 2.78, "grad_norm": 1.5624591276295847, "learning_rate": 1.3800422597965935e-07, "loss": 0.0474, "step": 10190 }, { "epoch": 2.78, "grad_norm": 1.4401612323497468, "learning_rate": 1.376605927214364e-07, "loss": 0.0387, "step": 10191 }, { "epoch": 2.78, "grad_norm": 1.3970698920584177, "learning_rate": 1.373173818517165e-07, "loss": 0.0448, "step": 10192 }, { "epoch": 2.78, "grad_norm": 1.6205673605008553, "learning_rate": 1.369745934003136e-07, "loss": 0.049, "step": 10193 }, { "epoch": 2.78, "grad_norm": 1.4896466474769081, "learning_rate": 1.3663222739700665e-07, "loss": 0.0448, "step": 10194 }, { "epoch": 2.78, "grad_norm": 1.5594991319235247, "learning_rate": 1.362902838715352e-07, "loss": 0.0448, "step": 10195 }, { "epoch": 2.78, "grad_norm": 1.4203446493838834, "learning_rate": 1.3594876285360548e-07, "loss": 0.0403, "step": 10196 }, { "epoch": 2.78, "grad_norm": 1.3779027645202082, "learning_rate": 1.3560766437288432e-07, "loss": 0.0391, "step": 10197 }, { "epoch": 2.78, "grad_norm": 1.594925848746612, "learning_rate": 1.3526698845900244e-07, "loss": 0.0513, "step": 10198 }, { "epoch": 2.78, "grad_norm": 1.384964692684855, "learning_rate": 1.3492673514155452e-07, "loss": 0.043, "step": 10199 }, { "epoch": 2.78, "grad_norm": 1.6021753219030994, "learning_rate": 1.3458690445009804e-07, "loss": 0.0444, "step": 10200 }, { "epoch": 2.78, "grad_norm": 1.3658660049668352, "learning_rate": 1.342474964141538e-07, "loss": 0.0414, "step": 10201 }, { "epoch": 2.79, "grad_norm": 1.5958701857333515, "learning_rate": 1.3390851106320656e-07, "loss": 0.0477, "step": 10202 }, { "epoch": 2.79, "grad_norm": 1.645845823444285, "learning_rate": 1.3356994842670335e-07, "loss": 0.053, "step": 10203 }, { "epoch": 2.79, "grad_norm": 1.5876968401139924, "learning_rate": 1.3323180853405504e-07, "loss": 0.0499, "step": 10204 }, { "epoch": 2.79, "grad_norm": 1.3767623368089479, "learning_rate": 1.328940914146354e-07, "loss": 0.0427, "step": 10205 }, { "epoch": 2.79, "grad_norm": 1.324832777356151, "learning_rate": 1.3255679709778148e-07, "loss": 0.034, "step": 10206 }, { "epoch": 2.79, "grad_norm": 1.6321385577055632, "learning_rate": 1.322199256127943e-07, "loss": 0.0483, "step": 10207 }, { "epoch": 2.79, "grad_norm": 1.4608467447509321, "learning_rate": 1.3188347698893767e-07, "loss": 0.0459, "step": 10208 }, { "epoch": 2.79, "grad_norm": 1.1990162029341689, "learning_rate": 1.3154745125543877e-07, "loss": 0.0386, "step": 10209 }, { "epoch": 2.79, "grad_norm": 1.6463833026468588, "learning_rate": 1.312118484414876e-07, "loss": 0.0451, "step": 10210 }, { "epoch": 2.79, "grad_norm": 1.4055817944023774, "learning_rate": 1.308766685762375e-07, "loss": 0.0387, "step": 10211 }, { "epoch": 2.79, "grad_norm": 1.3649988242859281, "learning_rate": 1.3054191168880682e-07, "loss": 0.0345, "step": 10212 }, { "epoch": 2.79, "grad_norm": 1.3899041587005065, "learning_rate": 1.3020757780827343e-07, "loss": 0.0434, "step": 10213 }, { "epoch": 2.79, "grad_norm": 1.2815803586900372, "learning_rate": 1.2987366696368243e-07, "loss": 0.0357, "step": 10214 }, { "epoch": 2.79, "grad_norm": 1.734277136420921, "learning_rate": 1.2954017918404006e-07, "loss": 0.0597, "step": 10215 }, { "epoch": 2.79, "grad_norm": 1.4887782261271665, "learning_rate": 1.2920711449831646e-07, "loss": 0.0493, "step": 10216 }, { "epoch": 2.79, "grad_norm": 1.472048747818957, "learning_rate": 1.2887447293544353e-07, "loss": 0.0415, "step": 10217 }, { "epoch": 2.79, "grad_norm": 1.6798398333796083, "learning_rate": 1.2854225452431923e-07, "loss": 0.0447, "step": 10218 }, { "epoch": 2.79, "grad_norm": 1.3257456437823179, "learning_rate": 1.2821045929380162e-07, "loss": 0.0443, "step": 10219 }, { "epoch": 2.79, "grad_norm": 1.8266360371144905, "learning_rate": 1.2787908727271536e-07, "loss": 0.045, "step": 10220 }, { "epoch": 2.79, "grad_norm": 1.4740762837088641, "learning_rate": 1.2754813848984526e-07, "loss": 0.05, "step": 10221 }, { "epoch": 2.79, "grad_norm": 1.6029878452905846, "learning_rate": 1.2721761297394108e-07, "loss": 0.0467, "step": 10222 }, { "epoch": 2.79, "grad_norm": 1.4101053076620393, "learning_rate": 1.268875107537143e-07, "loss": 0.043, "step": 10223 }, { "epoch": 2.79, "grad_norm": 1.3741626795655153, "learning_rate": 1.2655783185784253e-07, "loss": 0.0376, "step": 10224 }, { "epoch": 2.79, "grad_norm": 1.3306873939405592, "learning_rate": 1.2622857631496344e-07, "loss": 0.0465, "step": 10225 }, { "epoch": 2.79, "grad_norm": 1.4822898465031749, "learning_rate": 1.2589974415367968e-07, "loss": 0.0466, "step": 10226 }, { "epoch": 2.79, "grad_norm": 1.5327377675320248, "learning_rate": 1.2557133540255728e-07, "loss": 0.0452, "step": 10227 }, { "epoch": 2.79, "grad_norm": 1.379059637986405, "learning_rate": 1.25243350090124e-07, "loss": 0.0428, "step": 10228 }, { "epoch": 2.79, "grad_norm": 1.6152608978483405, "learning_rate": 1.2491578824487204e-07, "loss": 0.0459, "step": 10229 }, { "epoch": 2.79, "grad_norm": 1.502272124704391, "learning_rate": 1.24588649895257e-07, "loss": 0.0405, "step": 10230 }, { "epoch": 2.79, "grad_norm": 1.6826585532978697, "learning_rate": 1.2426193506969607e-07, "loss": 0.0471, "step": 10231 }, { "epoch": 2.79, "grad_norm": 1.3696286621718514, "learning_rate": 1.2393564379657163e-07, "loss": 0.0366, "step": 10232 }, { "epoch": 2.79, "grad_norm": 1.601824766206709, "learning_rate": 1.2360977610422874e-07, "loss": 0.0425, "step": 10233 }, { "epoch": 2.79, "grad_norm": 1.1938427245129186, "learning_rate": 1.2328433202097422e-07, "loss": 0.0322, "step": 10234 }, { "epoch": 2.79, "grad_norm": 1.6093412248909347, "learning_rate": 1.229593115750799e-07, "loss": 0.0485, "step": 10235 }, { "epoch": 2.79, "grad_norm": 1.5378740047703965, "learning_rate": 1.2263471479477984e-07, "loss": 0.0426, "step": 10236 }, { "epoch": 2.79, "grad_norm": 1.5700360144658767, "learning_rate": 1.2231054170827205e-07, "loss": 0.0444, "step": 10237 }, { "epoch": 2.79, "grad_norm": 1.352477567417536, "learning_rate": 1.219867923437168e-07, "loss": 0.0376, "step": 10238 }, { "epoch": 2.8, "grad_norm": 1.261433457523689, "learning_rate": 1.2166346672923824e-07, "loss": 0.0407, "step": 10239 }, { "epoch": 2.8, "grad_norm": 1.5882594161659969, "learning_rate": 1.2134056489292335e-07, "loss": 0.051, "step": 10240 }, { "epoch": 2.8, "grad_norm": 3.0624647775817864, "learning_rate": 1.210180868628219e-07, "loss": 0.0448, "step": 10241 }, { "epoch": 2.8, "grad_norm": 1.4300234273668058, "learning_rate": 1.2069603266694873e-07, "loss": 0.0417, "step": 10242 }, { "epoch": 2.8, "grad_norm": 1.524139481692172, "learning_rate": 1.2037440233327868e-07, "loss": 0.0386, "step": 10243 }, { "epoch": 2.8, "grad_norm": 1.6800072034563645, "learning_rate": 1.2005319588975328e-07, "loss": 0.0466, "step": 10244 }, { "epoch": 2.8, "grad_norm": 1.572036328377728, "learning_rate": 1.1973241336427522e-07, "loss": 0.0422, "step": 10245 }, { "epoch": 2.8, "grad_norm": 2.039479993750843, "learning_rate": 1.1941205478470995e-07, "loss": 0.0488, "step": 10246 }, { "epoch": 2.8, "grad_norm": 1.3253889628535431, "learning_rate": 1.1909212017888639e-07, "loss": 0.0388, "step": 10247 }, { "epoch": 2.8, "grad_norm": 1.370221469506656, "learning_rate": 1.1877260957459835e-07, "loss": 0.042, "step": 10248 }, { "epoch": 2.8, "grad_norm": 1.5202802116543876, "learning_rate": 1.1845352299960089e-07, "loss": 0.0495, "step": 10249 }, { "epoch": 2.8, "grad_norm": 1.4972730505640341, "learning_rate": 1.1813486048161348e-07, "loss": 0.0452, "step": 10250 }, { "epoch": 2.8, "grad_norm": 1.8616685997059415, "learning_rate": 1.1781662204831735e-07, "loss": 0.0509, "step": 10251 }, { "epoch": 2.8, "grad_norm": 1.6096214226019423, "learning_rate": 1.1749880772735811e-07, "loss": 0.0405, "step": 10252 }, { "epoch": 2.8, "grad_norm": 1.5009625917660852, "learning_rate": 1.1718141754634371e-07, "loss": 0.0449, "step": 10253 }, { "epoch": 2.8, "grad_norm": 1.4716946698278233, "learning_rate": 1.1686445153284598e-07, "loss": 0.0412, "step": 10254 }, { "epoch": 2.8, "grad_norm": 1.4623878498518141, "learning_rate": 1.1654790971439956e-07, "loss": 0.0475, "step": 10255 }, { "epoch": 2.8, "grad_norm": 1.5477689084291164, "learning_rate": 1.1623179211850244e-07, "loss": 0.049, "step": 10256 }, { "epoch": 2.8, "grad_norm": 1.4660251036140532, "learning_rate": 1.1591609877261545e-07, "loss": 0.0439, "step": 10257 }, { "epoch": 2.8, "grad_norm": 1.2390179348252883, "learning_rate": 1.1560082970416164e-07, "loss": 0.038, "step": 10258 }, { "epoch": 2.8, "grad_norm": 1.6294912710546787, "learning_rate": 1.1528598494052967e-07, "loss": 0.0395, "step": 10259 }, { "epoch": 2.8, "grad_norm": 1.8053669469188616, "learning_rate": 1.1497156450906933e-07, "loss": 0.0507, "step": 10260 }, { "epoch": 2.8, "grad_norm": 1.4246623450905862, "learning_rate": 1.1465756843709431e-07, "loss": 0.0406, "step": 10261 }, { "epoch": 2.8, "grad_norm": 1.4200724216566583, "learning_rate": 1.1434399675188112e-07, "loss": 0.0483, "step": 10262 }, { "epoch": 2.8, "grad_norm": 1.4818500816572715, "learning_rate": 1.1403084948067023e-07, "loss": 0.046, "step": 10263 }, { "epoch": 2.8, "grad_norm": 1.6169302805486885, "learning_rate": 1.1371812665066262e-07, "loss": 0.0407, "step": 10264 }, { "epoch": 2.8, "grad_norm": 1.3403597403887053, "learning_rate": 1.1340582828902658e-07, "loss": 0.0436, "step": 10265 }, { "epoch": 2.8, "grad_norm": 1.4519373707005698, "learning_rate": 1.1309395442288928e-07, "loss": 0.0421, "step": 10266 }, { "epoch": 2.8, "grad_norm": 1.3980574560668704, "learning_rate": 1.1278250507934518e-07, "loss": 0.0442, "step": 10267 }, { "epoch": 2.8, "grad_norm": 1.4310280768496546, "learning_rate": 1.1247148028544819e-07, "loss": 0.041, "step": 10268 }, { "epoch": 2.8, "grad_norm": 1.60816964126377, "learning_rate": 1.1216088006821724e-07, "loss": 0.0447, "step": 10269 }, { "epoch": 2.8, "grad_norm": 1.9797915469700673, "learning_rate": 1.1185070445463352e-07, "loss": 0.0551, "step": 10270 }, { "epoch": 2.8, "grad_norm": 1.5612072625321713, "learning_rate": 1.1154095347164274e-07, "loss": 0.0521, "step": 10271 }, { "epoch": 2.8, "grad_norm": 1.5114783108727903, "learning_rate": 1.1123162714615221e-07, "loss": 0.04, "step": 10272 }, { "epoch": 2.8, "grad_norm": 1.3942306456436464, "learning_rate": 1.1092272550503269e-07, "loss": 0.0404, "step": 10273 }, { "epoch": 2.8, "grad_norm": 1.5997001004650417, "learning_rate": 1.1061424857511937e-07, "loss": 0.0527, "step": 10274 }, { "epoch": 2.81, "grad_norm": 1.6502340585635868, "learning_rate": 1.1030619638320805e-07, "loss": 0.0446, "step": 10275 }, { "epoch": 2.81, "grad_norm": 1.4094479406994191, "learning_rate": 1.0999856895605953e-07, "loss": 0.045, "step": 10276 }, { "epoch": 2.81, "grad_norm": 1.5145313916333254, "learning_rate": 1.0969136632039746e-07, "loss": 0.0449, "step": 10277 }, { "epoch": 2.81, "grad_norm": 1.4800230600559876, "learning_rate": 1.0938458850290823e-07, "loss": 0.0461, "step": 10278 }, { "epoch": 2.81, "grad_norm": 1.509031164988642, "learning_rate": 1.0907823553024166e-07, "loss": 0.0462, "step": 10279 }, { "epoch": 2.81, "grad_norm": 1.8100307906192197, "learning_rate": 1.0877230742901035e-07, "loss": 0.0478, "step": 10280 }, { "epoch": 2.81, "grad_norm": 1.6272359398029264, "learning_rate": 1.084668042257897e-07, "loss": 0.0391, "step": 10281 }, { "epoch": 2.81, "grad_norm": 1.6515151883720558, "learning_rate": 1.0816172594711904e-07, "loss": 0.0553, "step": 10282 }, { "epoch": 2.81, "grad_norm": 1.5686126414868269, "learning_rate": 1.0785707261949996e-07, "loss": 0.0418, "step": 10283 }, { "epoch": 2.81, "grad_norm": 1.327101290021838, "learning_rate": 1.0755284426939794e-07, "loss": 0.0375, "step": 10284 }, { "epoch": 2.81, "grad_norm": 1.3216015845474436, "learning_rate": 1.0724904092324074e-07, "loss": 0.0427, "step": 10285 }, { "epoch": 2.81, "grad_norm": 1.4432631845120643, "learning_rate": 1.0694566260742001e-07, "loss": 0.0379, "step": 10286 }, { "epoch": 2.81, "grad_norm": 1.5885397278330393, "learning_rate": 1.0664270934828969e-07, "loss": 0.0476, "step": 10287 }, { "epoch": 2.81, "grad_norm": 1.5102568216053067, "learning_rate": 1.0634018117216705e-07, "loss": 0.0482, "step": 10288 }, { "epoch": 2.81, "grad_norm": 1.5269141424697574, "learning_rate": 1.0603807810533273e-07, "loss": 0.0421, "step": 10289 }, { "epoch": 2.81, "grad_norm": 1.6185465992239596, "learning_rate": 1.0573640017402964e-07, "loss": 0.0481, "step": 10290 }, { "epoch": 2.81, "grad_norm": 1.5601938207928556, "learning_rate": 1.054351474044657e-07, "loss": 0.0448, "step": 10291 }, { "epoch": 2.81, "grad_norm": 1.4916135845255432, "learning_rate": 1.0513431982280997e-07, "loss": 0.0476, "step": 10292 }, { "epoch": 2.81, "grad_norm": 1.5692871454727004, "learning_rate": 1.0483391745519488e-07, "loss": 0.0545, "step": 10293 }, { "epoch": 2.81, "grad_norm": 1.3951246053325876, "learning_rate": 1.0453394032771569e-07, "loss": 0.0455, "step": 10294 }, { "epoch": 2.81, "grad_norm": 1.321654252823857, "learning_rate": 1.0423438846643264e-07, "loss": 0.0382, "step": 10295 }, { "epoch": 2.81, "grad_norm": 1.285175199321555, "learning_rate": 1.0393526189736602e-07, "loss": 0.0378, "step": 10296 }, { "epoch": 2.81, "grad_norm": 1.4703895036001293, "learning_rate": 1.0363656064650174e-07, "loss": 0.0333, "step": 10297 }, { "epoch": 2.81, "grad_norm": 1.319733881728103, "learning_rate": 1.0333828473978846e-07, "loss": 0.0356, "step": 10298 }, { "epoch": 2.81, "grad_norm": 0.9693758319323303, "learning_rate": 1.0304043420313602e-07, "loss": 0.0282, "step": 10299 }, { "epoch": 2.81, "grad_norm": 1.3891653156750152, "learning_rate": 1.0274300906241819e-07, "loss": 0.0445, "step": 10300 }, { "epoch": 2.81, "grad_norm": 1.6528772992668859, "learning_rate": 1.0244600934347371e-07, "loss": 0.0494, "step": 10301 }, { "epoch": 2.81, "grad_norm": 1.4941120781891513, "learning_rate": 1.0214943507210085e-07, "loss": 0.0458, "step": 10302 }, { "epoch": 2.81, "grad_norm": 1.3169203646237786, "learning_rate": 1.018532862740651e-07, "loss": 0.0435, "step": 10303 }, { "epoch": 2.81, "grad_norm": 1.412247627194948, "learning_rate": 1.015575629750909e-07, "loss": 0.0401, "step": 10304 }, { "epoch": 2.81, "grad_norm": 1.4512937381357316, "learning_rate": 1.0126226520086823e-07, "loss": 0.0429, "step": 10305 }, { "epoch": 2.81, "grad_norm": 1.3243465959762477, "learning_rate": 1.0096739297704938e-07, "loss": 0.0405, "step": 10306 }, { "epoch": 2.81, "grad_norm": 1.7011885514836016, "learning_rate": 1.006729463292494e-07, "loss": 0.0516, "step": 10307 }, { "epoch": 2.81, "grad_norm": 1.5913459298847301, "learning_rate": 1.0037892528304726e-07, "loss": 0.048, "step": 10308 }, { "epoch": 2.81, "grad_norm": 1.4834376671130916, "learning_rate": 1.0008532986398422e-07, "loss": 0.0497, "step": 10309 }, { "epoch": 2.81, "grad_norm": 1.6195733931655385, "learning_rate": 9.979216009756488e-08, "loss": 0.0411, "step": 10310 }, { "epoch": 2.81, "grad_norm": 1.474512514504948, "learning_rate": 9.949941600925606e-08, "loss": 0.0438, "step": 10311 }, { "epoch": 2.82, "grad_norm": 1.3162705760507007, "learning_rate": 9.920709762448854e-08, "loss": 0.038, "step": 10312 }, { "epoch": 2.82, "grad_norm": 1.303891864528856, "learning_rate": 9.891520496865647e-08, "loss": 0.0394, "step": 10313 }, { "epoch": 2.82, "grad_norm": 1.6789717579052865, "learning_rate": 9.862373806711567e-08, "loss": 0.0444, "step": 10314 }, { "epoch": 2.82, "grad_norm": 1.5782821863584564, "learning_rate": 9.833269694518587e-08, "loss": 0.0478, "step": 10315 }, { "epoch": 2.82, "grad_norm": 1.4184351865591265, "learning_rate": 9.804208162815021e-08, "loss": 0.0439, "step": 10316 }, { "epoch": 2.82, "grad_norm": 1.514993088182654, "learning_rate": 9.775189214125347e-08, "loss": 0.0432, "step": 10317 }, { "epoch": 2.82, "grad_norm": 1.4028780395722584, "learning_rate": 9.746212850970383e-08, "loss": 0.042, "step": 10318 }, { "epoch": 2.82, "grad_norm": 1.3638358319960358, "learning_rate": 9.717279075867448e-08, "loss": 0.0388, "step": 10319 }, { "epoch": 2.82, "grad_norm": 1.255442066749372, "learning_rate": 9.688387891329864e-08, "loss": 0.038, "step": 10320 }, { "epoch": 2.82, "grad_norm": 1.7533958667775111, "learning_rate": 9.65953929986746e-08, "loss": 0.0514, "step": 10321 }, { "epoch": 2.82, "grad_norm": 1.8962955796887333, "learning_rate": 9.630733303986283e-08, "loss": 0.0527, "step": 10322 }, { "epoch": 2.82, "grad_norm": 1.3924174917053918, "learning_rate": 9.601969906188723e-08, "loss": 0.0437, "step": 10323 }, { "epoch": 2.82, "grad_norm": 1.575135355359502, "learning_rate": 9.573249108973281e-08, "loss": 0.0526, "step": 10324 }, { "epoch": 2.82, "grad_norm": 1.427146965064548, "learning_rate": 9.544570914835128e-08, "loss": 0.0397, "step": 10325 }, { "epoch": 2.82, "grad_norm": 1.5062534836147499, "learning_rate": 9.51593532626538e-08, "loss": 0.0507, "step": 10326 }, { "epoch": 2.82, "grad_norm": 1.5246331564463163, "learning_rate": 9.48734234575166e-08, "loss": 0.0436, "step": 10327 }, { "epoch": 2.82, "grad_norm": 1.6828932792811744, "learning_rate": 9.45879197577787e-08, "loss": 0.0506, "step": 10328 }, { "epoch": 2.82, "grad_norm": 1.4481151660985787, "learning_rate": 9.430284218824026e-08, "loss": 0.0436, "step": 10329 }, { "epoch": 2.82, "grad_norm": 1.442306868238276, "learning_rate": 9.401819077366648e-08, "loss": 0.0432, "step": 10330 }, { "epoch": 2.82, "grad_norm": 1.284158726469798, "learning_rate": 9.373396553878533e-08, "loss": 0.0345, "step": 10331 }, { "epoch": 2.82, "grad_norm": 1.57870026596695, "learning_rate": 9.345016650828598e-08, "loss": 0.0444, "step": 10332 }, { "epoch": 2.82, "grad_norm": 1.4688552879807097, "learning_rate": 9.316679370682368e-08, "loss": 0.0451, "step": 10333 }, { "epoch": 2.82, "grad_norm": 1.5659604489405252, "learning_rate": 9.288384715901377e-08, "loss": 0.0513, "step": 10334 }, { "epoch": 2.82, "grad_norm": 1.5777080898318339, "learning_rate": 9.260132688943546e-08, "loss": 0.0441, "step": 10335 }, { "epoch": 2.82, "grad_norm": 1.2774441832928285, "learning_rate": 9.231923292263134e-08, "loss": 0.0378, "step": 10336 }, { "epoch": 2.82, "grad_norm": 1.5296833588321208, "learning_rate": 9.203756528310737e-08, "loss": 0.0437, "step": 10337 }, { "epoch": 2.82, "grad_norm": 1.889128590443198, "learning_rate": 9.175632399533118e-08, "loss": 0.0516, "step": 10338 }, { "epoch": 2.82, "grad_norm": 1.6249260949307074, "learning_rate": 9.147550908373381e-08, "loss": 0.0434, "step": 10339 }, { "epoch": 2.82, "grad_norm": 1.5555338711340916, "learning_rate": 9.119512057271074e-08, "loss": 0.0496, "step": 10340 }, { "epoch": 2.82, "grad_norm": 1.1433943411402412, "learning_rate": 9.091515848661747e-08, "loss": 0.0315, "step": 10341 }, { "epoch": 2.82, "grad_norm": 1.5240935042432777, "learning_rate": 9.063562284977512e-08, "loss": 0.0441, "step": 10342 }, { "epoch": 2.82, "grad_norm": 1.614214158248411, "learning_rate": 9.035651368646647e-08, "loss": 0.0442, "step": 10343 }, { "epoch": 2.82, "grad_norm": 1.4971010822561845, "learning_rate": 9.00778310209377e-08, "loss": 0.0385, "step": 10344 }, { "epoch": 2.82, "grad_norm": 1.4149919038604564, "learning_rate": 8.979957487739832e-08, "loss": 0.0363, "step": 10345 }, { "epoch": 2.82, "grad_norm": 1.6562788039679477, "learning_rate": 8.952174528001955e-08, "loss": 0.0426, "step": 10346 }, { "epoch": 2.82, "grad_norm": 1.3299042671092614, "learning_rate": 8.924434225293654e-08, "loss": 0.0396, "step": 10347 }, { "epoch": 2.83, "grad_norm": 1.3912319361516368, "learning_rate": 8.896736582024667e-08, "loss": 0.0391, "step": 10348 }, { "epoch": 2.83, "grad_norm": 1.3235075232631488, "learning_rate": 8.869081600601126e-08, "loss": 0.0399, "step": 10349 }, { "epoch": 2.83, "grad_norm": 1.5196248668925225, "learning_rate": 8.84146928342544e-08, "loss": 0.0444, "step": 10350 }, { "epoch": 2.83, "grad_norm": 1.3314955621203368, "learning_rate": 8.813899632896194e-08, "loss": 0.0382, "step": 10351 }, { "epoch": 2.83, "grad_norm": 1.5136119482377008, "learning_rate": 8.786372651408359e-08, "loss": 0.0391, "step": 10352 }, { "epoch": 2.83, "grad_norm": 1.6113514667425146, "learning_rate": 8.758888341353189e-08, "loss": 0.0436, "step": 10353 }, { "epoch": 2.83, "grad_norm": 1.4628482667214961, "learning_rate": 8.731446705118274e-08, "loss": 0.0392, "step": 10354 }, { "epoch": 2.83, "grad_norm": 1.4541406677547066, "learning_rate": 8.704047745087429e-08, "loss": 0.0401, "step": 10355 }, { "epoch": 2.83, "grad_norm": 1.47844119824978, "learning_rate": 8.676691463640752e-08, "loss": 0.0483, "step": 10356 }, { "epoch": 2.83, "grad_norm": 1.2880468902319846, "learning_rate": 8.649377863154728e-08, "loss": 0.0375, "step": 10357 }, { "epoch": 2.83, "grad_norm": 1.7535988972256478, "learning_rate": 8.622106946002074e-08, "loss": 0.0514, "step": 10358 }, { "epoch": 2.83, "grad_norm": 1.2373807504773728, "learning_rate": 8.594878714551669e-08, "loss": 0.0333, "step": 10359 }, { "epoch": 2.83, "grad_norm": 1.3325768562989997, "learning_rate": 8.567693171168956e-08, "loss": 0.0412, "step": 10360 }, { "epoch": 2.83, "grad_norm": 1.9237394540285586, "learning_rate": 8.540550318215434e-08, "loss": 0.0618, "step": 10361 }, { "epoch": 2.83, "grad_norm": 1.6409395837992216, "learning_rate": 8.513450158049109e-08, "loss": 0.0519, "step": 10362 }, { "epoch": 2.83, "grad_norm": 1.6383801130934434, "learning_rate": 8.486392693024038e-08, "loss": 0.0493, "step": 10363 }, { "epoch": 2.83, "grad_norm": 1.512018204354824, "learning_rate": 8.459377925490786e-08, "loss": 0.05, "step": 10364 }, { "epoch": 2.83, "grad_norm": 1.4367333890512126, "learning_rate": 8.432405857795978e-08, "loss": 0.0432, "step": 10365 }, { "epoch": 2.83, "grad_norm": 1.4262197437763022, "learning_rate": 8.405476492282739e-08, "loss": 0.0418, "step": 10366 }, { "epoch": 2.83, "grad_norm": 1.3326549464357016, "learning_rate": 8.378589831290363e-08, "loss": 0.035, "step": 10367 }, { "epoch": 2.83, "grad_norm": 1.7137729101554846, "learning_rate": 8.351745877154594e-08, "loss": 0.0459, "step": 10368 }, { "epoch": 2.83, "grad_norm": 1.3902164569559086, "learning_rate": 8.324944632207288e-08, "loss": 0.0381, "step": 10369 }, { "epoch": 2.83, "grad_norm": 1.448264673910247, "learning_rate": 8.298186098776583e-08, "loss": 0.04, "step": 10370 }, { "epoch": 2.83, "grad_norm": 1.670068270432286, "learning_rate": 8.27147027918701e-08, "loss": 0.0461, "step": 10371 }, { "epoch": 2.83, "grad_norm": 1.3463922980741976, "learning_rate": 8.244797175759434e-08, "loss": 0.0342, "step": 10372 }, { "epoch": 2.83, "grad_norm": 1.386949657687647, "learning_rate": 8.218166790810833e-08, "loss": 0.0392, "step": 10373 }, { "epoch": 2.83, "grad_norm": 1.4742312064066943, "learning_rate": 8.191579126654637e-08, "loss": 0.045, "step": 10374 }, { "epoch": 2.83, "grad_norm": 1.4694893700869942, "learning_rate": 8.165034185600496e-08, "loss": 0.043, "step": 10375 }, { "epoch": 2.83, "grad_norm": 1.5876849825880603, "learning_rate": 8.138531969954289e-08, "loss": 0.0435, "step": 10376 }, { "epoch": 2.83, "grad_norm": 1.7141144862744118, "learning_rate": 8.11207248201834e-08, "loss": 0.0527, "step": 10377 }, { "epoch": 2.83, "grad_norm": 1.6802114553234544, "learning_rate": 8.08565572409109e-08, "loss": 0.0471, "step": 10378 }, { "epoch": 2.83, "grad_norm": 1.7638751949800415, "learning_rate": 8.059281698467369e-08, "loss": 0.0515, "step": 10379 }, { "epoch": 2.83, "grad_norm": 1.4159406269711217, "learning_rate": 8.032950407438289e-08, "loss": 0.0418, "step": 10380 }, { "epoch": 2.83, "grad_norm": 1.4966568094806383, "learning_rate": 8.006661853291298e-08, "loss": 0.0407, "step": 10381 }, { "epoch": 2.83, "grad_norm": 1.381330016620923, "learning_rate": 7.980416038309902e-08, "loss": 0.0408, "step": 10382 }, { "epoch": 2.83, "grad_norm": 1.31171745013247, "learning_rate": 7.954212964774166e-08, "loss": 0.0362, "step": 10383 }, { "epoch": 2.83, "grad_norm": 1.2703144126665844, "learning_rate": 7.928052634960382e-08, "loss": 0.0431, "step": 10384 }, { "epoch": 2.84, "grad_norm": 1.4227961100887532, "learning_rate": 7.901935051140952e-08, "loss": 0.0478, "step": 10385 }, { "epoch": 2.84, "grad_norm": 1.7906918875077478, "learning_rate": 7.875860215584841e-08, "loss": 0.0431, "step": 10386 }, { "epoch": 2.84, "grad_norm": 1.617959422979846, "learning_rate": 7.849828130557013e-08, "loss": 0.039, "step": 10387 }, { "epoch": 2.84, "grad_norm": 1.3952306771581602, "learning_rate": 7.823838798318995e-08, "loss": 0.0413, "step": 10388 }, { "epoch": 2.84, "grad_norm": 1.3842298734069025, "learning_rate": 7.797892221128311e-08, "loss": 0.0379, "step": 10389 }, { "epoch": 2.84, "grad_norm": 1.4615336826799024, "learning_rate": 7.77198840123905e-08, "loss": 0.0464, "step": 10390 }, { "epoch": 2.84, "grad_norm": 1.5553569264543106, "learning_rate": 7.746127340901411e-08, "loss": 0.0487, "step": 10391 }, { "epoch": 2.84, "grad_norm": 1.3813967362153239, "learning_rate": 7.720309042361984e-08, "loss": 0.0381, "step": 10392 }, { "epoch": 2.84, "grad_norm": 1.567256096422835, "learning_rate": 7.694533507863477e-08, "loss": 0.0458, "step": 10393 }, { "epoch": 2.84, "grad_norm": 1.241073047242047, "learning_rate": 7.6688007396451e-08, "loss": 0.0426, "step": 10394 }, { "epoch": 2.84, "grad_norm": 1.4485734996317654, "learning_rate": 7.643110739942172e-08, "loss": 0.0453, "step": 10395 }, { "epoch": 2.84, "grad_norm": 1.5640042434759203, "learning_rate": 7.617463510986466e-08, "loss": 0.0495, "step": 10396 }, { "epoch": 2.84, "grad_norm": 1.554320891888565, "learning_rate": 7.591859055005813e-08, "loss": 0.0455, "step": 10397 }, { "epoch": 2.84, "grad_norm": 1.3364329243032316, "learning_rate": 7.56629737422454e-08, "loss": 0.0388, "step": 10398 }, { "epoch": 2.84, "grad_norm": 1.536956510138542, "learning_rate": 7.540778470863153e-08, "loss": 0.0465, "step": 10399 }, { "epoch": 2.84, "grad_norm": 1.4399696994715812, "learning_rate": 7.515302347138486e-08, "loss": 0.0466, "step": 10400 }, { "epoch": 2.84, "grad_norm": 1.4785707142507865, "learning_rate": 7.48986900526355e-08, "loss": 0.046, "step": 10401 }, { "epoch": 2.84, "grad_norm": 1.2844883705836427, "learning_rate": 7.464478447447854e-08, "loss": 0.0356, "step": 10402 }, { "epoch": 2.84, "grad_norm": 1.6988691429939067, "learning_rate": 7.439130675896966e-08, "loss": 0.0443, "step": 10403 }, { "epoch": 2.84, "grad_norm": 1.3681141683345532, "learning_rate": 7.413825692812848e-08, "loss": 0.0405, "step": 10404 }, { "epoch": 2.84, "grad_norm": 1.5333713080712394, "learning_rate": 7.388563500393742e-08, "loss": 0.0444, "step": 10405 }, { "epoch": 2.84, "grad_norm": 1.4071943098207074, "learning_rate": 7.363344100834225e-08, "loss": 0.0416, "step": 10406 }, { "epoch": 2.84, "grad_norm": 1.6049639714903283, "learning_rate": 7.338167496324933e-08, "loss": 0.0451, "step": 10407 }, { "epoch": 2.84, "grad_norm": 1.4602801919560122, "learning_rate": 7.313033689053061e-08, "loss": 0.0422, "step": 10408 }, { "epoch": 2.84, "grad_norm": 1.4583022540475874, "learning_rate": 7.287942681201921e-08, "loss": 0.043, "step": 10409 }, { "epoch": 2.84, "grad_norm": 1.6096720733180037, "learning_rate": 7.26289447495121e-08, "loss": 0.05, "step": 10410 }, { "epoch": 2.84, "grad_norm": 1.3400683299466933, "learning_rate": 7.237889072476856e-08, "loss": 0.0436, "step": 10411 }, { "epoch": 2.84, "grad_norm": 1.5331925092574545, "learning_rate": 7.212926475950954e-08, "loss": 0.0475, "step": 10412 }, { "epoch": 2.84, "grad_norm": 1.3190404757539123, "learning_rate": 7.188006687542048e-08, "loss": 0.0403, "step": 10413 }, { "epoch": 2.84, "grad_norm": 1.464292288372917, "learning_rate": 7.163129709414962e-08, "loss": 0.0444, "step": 10414 }, { "epoch": 2.84, "grad_norm": 1.361384891605114, "learning_rate": 7.138295543730634e-08, "loss": 0.0376, "step": 10415 }, { "epoch": 2.84, "grad_norm": 1.4097894002658897, "learning_rate": 7.113504192646503e-08, "loss": 0.0471, "step": 10416 }, { "epoch": 2.84, "grad_norm": 1.410768994742681, "learning_rate": 7.088755658316127e-08, "loss": 0.0427, "step": 10417 }, { "epoch": 2.84, "grad_norm": 1.4381431534130324, "learning_rate": 7.064049942889395e-08, "loss": 0.0445, "step": 10418 }, { "epoch": 2.84, "grad_norm": 1.5946808364596332, "learning_rate": 7.03938704851248e-08, "loss": 0.0441, "step": 10419 }, { "epoch": 2.84, "grad_norm": 1.3293688540349755, "learning_rate": 7.014766977327836e-08, "loss": 0.0392, "step": 10420 }, { "epoch": 2.84, "grad_norm": 1.3937211122060589, "learning_rate": 6.990189731474195e-08, "loss": 0.04, "step": 10421 }, { "epoch": 2.85, "grad_norm": 1.7143530019462123, "learning_rate": 6.965655313086572e-08, "loss": 0.0521, "step": 10422 }, { "epoch": 2.85, "grad_norm": 1.4004485772801822, "learning_rate": 6.941163724296263e-08, "loss": 0.0387, "step": 10423 }, { "epoch": 2.85, "grad_norm": 1.5712393627263477, "learning_rate": 6.916714967230786e-08, "loss": 0.0439, "step": 10424 }, { "epoch": 2.85, "grad_norm": 1.3161319352546244, "learning_rate": 6.892309044014056e-08, "loss": 0.0354, "step": 10425 }, { "epoch": 2.85, "grad_norm": 1.5744980343029722, "learning_rate": 6.867945956766154e-08, "loss": 0.0444, "step": 10426 }, { "epoch": 2.85, "grad_norm": 1.3715513200862968, "learning_rate": 6.843625707603496e-08, "loss": 0.0408, "step": 10427 }, { "epoch": 2.85, "grad_norm": 1.4724034197542435, "learning_rate": 6.819348298638839e-08, "loss": 0.045, "step": 10428 }, { "epoch": 2.85, "grad_norm": 1.6140175253905769, "learning_rate": 6.795113731981052e-08, "loss": 0.0482, "step": 10429 }, { "epoch": 2.85, "grad_norm": 1.355919530318479, "learning_rate": 6.770922009735392e-08, "loss": 0.0403, "step": 10430 }, { "epoch": 2.85, "grad_norm": 1.2826238635166742, "learning_rate": 6.746773134003404e-08, "loss": 0.0357, "step": 10431 }, { "epoch": 2.85, "grad_norm": 1.4886024664071593, "learning_rate": 6.722667106882907e-08, "loss": 0.0442, "step": 10432 }, { "epoch": 2.85, "grad_norm": 1.7302810325645976, "learning_rate": 6.698603930467951e-08, "loss": 0.0454, "step": 10433 }, { "epoch": 2.85, "grad_norm": 1.7181990315340228, "learning_rate": 6.674583606848862e-08, "loss": 0.0574, "step": 10434 }, { "epoch": 2.85, "grad_norm": 1.489644584243883, "learning_rate": 6.650606138112358e-08, "loss": 0.0395, "step": 10435 }, { "epoch": 2.85, "grad_norm": 1.317091627112999, "learning_rate": 6.626671526341222e-08, "loss": 0.0381, "step": 10436 }, { "epoch": 2.85, "grad_norm": 1.49765090451203, "learning_rate": 6.602779773614731e-08, "loss": 0.0397, "step": 10437 }, { "epoch": 2.85, "grad_norm": 1.4742928506958795, "learning_rate": 6.578930882008283e-08, "loss": 0.0378, "step": 10438 }, { "epoch": 2.85, "grad_norm": 1.6911730204639146, "learning_rate": 6.555124853593719e-08, "loss": 0.0522, "step": 10439 }, { "epoch": 2.85, "grad_norm": 1.3221733205049593, "learning_rate": 6.531361690438942e-08, "loss": 0.0366, "step": 10440 }, { "epoch": 2.85, "grad_norm": 1.6437206403539906, "learning_rate": 6.507641394608355e-08, "loss": 0.0514, "step": 10441 }, { "epoch": 2.85, "grad_norm": 1.41723331166098, "learning_rate": 6.483963968162421e-08, "loss": 0.0411, "step": 10442 }, { "epoch": 2.85, "grad_norm": 1.7832281710388262, "learning_rate": 6.460329413157996e-08, "loss": 0.0519, "step": 10443 }, { "epoch": 2.85, "grad_norm": 1.5145694967997823, "learning_rate": 6.436737731648268e-08, "loss": 0.0449, "step": 10444 }, { "epoch": 2.85, "grad_norm": 1.514990966274188, "learning_rate": 6.4131889256826e-08, "loss": 0.0442, "step": 10445 }, { "epoch": 2.85, "grad_norm": 1.5178262509714926, "learning_rate": 6.389682997306689e-08, "loss": 0.0367, "step": 10446 }, { "epoch": 2.85, "grad_norm": 1.4268360447473367, "learning_rate": 6.366219948562402e-08, "loss": 0.0406, "step": 10447 }, { "epoch": 2.85, "grad_norm": 1.6302697018110215, "learning_rate": 6.342799781487997e-08, "loss": 0.0501, "step": 10448 }, { "epoch": 2.85, "grad_norm": 1.7688867703098903, "learning_rate": 6.31942249811801e-08, "loss": 0.0491, "step": 10449 }, { "epoch": 2.85, "grad_norm": 3.5442890796211093, "learning_rate": 6.296088100483155e-08, "loss": 0.0745, "step": 10450 }, { "epoch": 2.85, "grad_norm": 1.5245711706203575, "learning_rate": 6.27279659061053e-08, "loss": 0.0401, "step": 10451 }, { "epoch": 2.85, "grad_norm": 1.4076630845364102, "learning_rate": 6.249547970523407e-08, "loss": 0.046, "step": 10452 }, { "epoch": 2.85, "grad_norm": 1.4982870536977921, "learning_rate": 6.22634224224139e-08, "loss": 0.0511, "step": 10453 }, { "epoch": 2.85, "grad_norm": 1.5676202407555635, "learning_rate": 6.203179407780368e-08, "loss": 0.042, "step": 10454 }, { "epoch": 2.85, "grad_norm": 1.3329055305244886, "learning_rate": 6.18005946915251e-08, "loss": 0.0373, "step": 10455 }, { "epoch": 2.85, "grad_norm": 1.2716843715746093, "learning_rate": 6.156982428366154e-08, "loss": 0.0386, "step": 10456 }, { "epoch": 2.85, "grad_norm": 1.4250545852685859, "learning_rate": 6.133948287426028e-08, "loss": 0.0399, "step": 10457 }, { "epoch": 2.86, "grad_norm": 1.3659099017277354, "learning_rate": 6.110957048333088e-08, "loss": 0.043, "step": 10458 }, { "epoch": 2.86, "grad_norm": 1.5264272316243634, "learning_rate": 6.088008713084626e-08, "loss": 0.0469, "step": 10459 }, { "epoch": 2.86, "grad_norm": 1.586533580149427, "learning_rate": 6.065103283674045e-08, "loss": 0.04, "step": 10460 }, { "epoch": 2.86, "grad_norm": 1.3908812825700378, "learning_rate": 6.0422407620912e-08, "loss": 0.0398, "step": 10461 }, { "epoch": 2.86, "grad_norm": 1.4789001012786605, "learning_rate": 6.019421150322114e-08, "loss": 0.0457, "step": 10462 }, { "epoch": 2.86, "grad_norm": 1.3846931614588163, "learning_rate": 5.996644450349142e-08, "loss": 0.0346, "step": 10463 }, { "epoch": 2.86, "grad_norm": 1.7817040833377382, "learning_rate": 5.973910664150818e-08, "loss": 0.0502, "step": 10464 }, { "epoch": 2.86, "grad_norm": 1.3354937827456324, "learning_rate": 5.951219793702112e-08, "loss": 0.036, "step": 10465 }, { "epoch": 2.86, "grad_norm": 1.6107870625726324, "learning_rate": 5.928571840974062e-08, "loss": 0.0445, "step": 10466 }, { "epoch": 2.86, "grad_norm": 1.7054474609660477, "learning_rate": 5.905966807934205e-08, "loss": 0.0436, "step": 10467 }, { "epoch": 2.86, "grad_norm": 1.5140400889771781, "learning_rate": 5.88340469654608e-08, "loss": 0.0443, "step": 10468 }, { "epoch": 2.86, "grad_norm": 1.6091141052733897, "learning_rate": 5.8608855087697314e-08, "loss": 0.0482, "step": 10469 }, { "epoch": 2.86, "grad_norm": 1.4973352005753136, "learning_rate": 5.8384092465614274e-08, "loss": 0.0465, "step": 10470 }, { "epoch": 2.86, "grad_norm": 1.4628731345895611, "learning_rate": 5.815975911873606e-08, "loss": 0.0435, "step": 10471 }, { "epoch": 2.86, "grad_norm": 1.6299086228498854, "learning_rate": 5.7935855066549863e-08, "loss": 0.0569, "step": 10472 }, { "epoch": 2.86, "grad_norm": 1.390736585950677, "learning_rate": 5.771238032850679e-08, "loss": 0.0394, "step": 10473 }, { "epoch": 2.86, "grad_norm": 1.4745681489958136, "learning_rate": 5.74893349240202e-08, "loss": 0.0421, "step": 10474 }, { "epoch": 2.86, "grad_norm": 1.4167210859699482, "learning_rate": 5.726671887246515e-08, "loss": 0.0459, "step": 10475 }, { "epoch": 2.86, "grad_norm": 1.60975529460272, "learning_rate": 5.704453219318118e-08, "loss": 0.0497, "step": 10476 }, { "epoch": 2.86, "grad_norm": 1.7771808830047289, "learning_rate": 5.682277490546839e-08, "loss": 0.0515, "step": 10477 }, { "epoch": 2.86, "grad_norm": 1.6642573367482645, "learning_rate": 5.6601447028591384e-08, "loss": 0.0503, "step": 10478 }, { "epoch": 2.86, "grad_norm": 1.4304031850761514, "learning_rate": 5.638054858177644e-08, "loss": 0.0421, "step": 10479 }, { "epoch": 2.86, "grad_norm": 1.6052916801962822, "learning_rate": 5.616007958421321e-08, "loss": 0.0536, "step": 10480 }, { "epoch": 2.86, "grad_norm": 1.3406679097447967, "learning_rate": 5.5940040055053604e-08, "loss": 0.041, "step": 10481 }, { "epoch": 2.86, "grad_norm": 1.7884976405084136, "learning_rate": 5.572043001341232e-08, "loss": 0.0508, "step": 10482 }, { "epoch": 2.86, "grad_norm": 1.5050543149581124, "learning_rate": 5.550124947836688e-08, "loss": 0.0405, "step": 10483 }, { "epoch": 2.86, "grad_norm": 1.299849223166704, "learning_rate": 5.5282498468957056e-08, "loss": 0.0352, "step": 10484 }, { "epoch": 2.86, "grad_norm": 1.7670370218793927, "learning_rate": 5.5064177004185424e-08, "loss": 0.0558, "step": 10485 }, { "epoch": 2.86, "grad_norm": 1.368751530837964, "learning_rate": 5.484628510301793e-08, "loss": 0.0417, "step": 10486 }, { "epoch": 2.86, "grad_norm": 1.5111358559904722, "learning_rate": 5.462882278438275e-08, "loss": 0.0466, "step": 10487 }, { "epoch": 2.86, "grad_norm": 1.5182220647113223, "learning_rate": 5.4411790067170345e-08, "loss": 0.0423, "step": 10488 }, { "epoch": 2.86, "grad_norm": 1.5593558009746167, "learning_rate": 5.4195186970234514e-08, "loss": 0.0516, "step": 10489 }, { "epoch": 2.86, "grad_norm": 1.5236994174620118, "learning_rate": 5.397901351239077e-08, "loss": 0.0414, "step": 10490 }, { "epoch": 2.86, "grad_norm": 1.4920153706616395, "learning_rate": 5.3763269712419076e-08, "loss": 0.0419, "step": 10491 }, { "epoch": 2.86, "grad_norm": 1.6839827935738112, "learning_rate": 5.354795558906001e-08, "loss": 0.0431, "step": 10492 }, { "epoch": 2.86, "grad_norm": 1.4922375139022215, "learning_rate": 5.333307116101804e-08, "loss": 0.0507, "step": 10493 }, { "epoch": 2.86, "grad_norm": 1.2345045699868673, "learning_rate": 5.3118616446960484e-08, "loss": 0.0378, "step": 10494 }, { "epoch": 2.87, "grad_norm": 1.387778502893405, "learning_rate": 5.2904591465516855e-08, "loss": 0.0407, "step": 10495 }, { "epoch": 2.87, "grad_norm": 1.4615672710949077, "learning_rate": 5.26909962352784e-08, "loss": 0.0412, "step": 10496 }, { "epoch": 2.87, "grad_norm": 1.5345797616087047, "learning_rate": 5.24778307748014e-08, "loss": 0.0437, "step": 10497 }, { "epoch": 2.87, "grad_norm": 1.392118549458176, "learning_rate": 5.226509510260214e-08, "loss": 0.0435, "step": 10498 }, { "epoch": 2.87, "grad_norm": 1.56025630476497, "learning_rate": 5.2052789237161395e-08, "loss": 0.0437, "step": 10499 }, { "epoch": 2.87, "grad_norm": 1.3696839715655833, "learning_rate": 5.184091319692219e-08, "loss": 0.0408, "step": 10500 }, { "epoch": 2.87, "grad_norm": 1.436605168578194, "learning_rate": 5.1629467000290365e-08, "loss": 0.0402, "step": 10501 }, { "epoch": 2.87, "grad_norm": 1.3792406807047526, "learning_rate": 5.1418450665633445e-08, "loss": 0.0411, "step": 10502 }, { "epoch": 2.87, "grad_norm": 1.39238645674009, "learning_rate": 5.1207864211282324e-08, "loss": 0.0364, "step": 10503 }, { "epoch": 2.87, "grad_norm": 1.613105318669477, "learning_rate": 5.099770765553069e-08, "loss": 0.0453, "step": 10504 }, { "epoch": 2.87, "grad_norm": 1.3801880881184851, "learning_rate": 5.078798101663507e-08, "loss": 0.0418, "step": 10505 }, { "epoch": 2.87, "grad_norm": 1.1254802287805858, "learning_rate": 5.057868431281421e-08, "loss": 0.0309, "step": 10506 }, { "epoch": 2.87, "grad_norm": 1.4453504735243323, "learning_rate": 5.0369817562249126e-08, "loss": 0.0407, "step": 10507 }, { "epoch": 2.87, "grad_norm": 1.625518649218529, "learning_rate": 5.016138078308364e-08, "loss": 0.0534, "step": 10508 }, { "epoch": 2.87, "grad_norm": 1.4075947882287927, "learning_rate": 4.9953373993426036e-08, "loss": 0.04, "step": 10509 }, { "epoch": 2.87, "grad_norm": 1.4844752833913288, "learning_rate": 4.9745797211344096e-08, "loss": 0.0387, "step": 10510 }, { "epoch": 2.87, "grad_norm": 1.3116317884502549, "learning_rate": 4.953865045487061e-08, "loss": 0.0394, "step": 10511 }, { "epoch": 2.87, "grad_norm": 1.4743315774940526, "learning_rate": 4.9331933742000627e-08, "loss": 0.044, "step": 10512 }, { "epoch": 2.87, "grad_norm": 1.390183659734006, "learning_rate": 4.91256470906909e-08, "loss": 0.035, "step": 10513 }, { "epoch": 2.87, "grad_norm": 1.6374208737038816, "learning_rate": 4.891979051886153e-08, "loss": 0.0517, "step": 10514 }, { "epoch": 2.87, "grad_norm": 1.3329173715421005, "learning_rate": 4.8714364044396e-08, "loss": 0.0397, "step": 10515 }, { "epoch": 2.87, "grad_norm": 1.4074261514127087, "learning_rate": 4.850936768513781e-08, "loss": 0.036, "step": 10516 }, { "epoch": 2.87, "grad_norm": 1.4525680209089034, "learning_rate": 4.83048014588966e-08, "loss": 0.0413, "step": 10517 }, { "epoch": 2.87, "grad_norm": 1.5016630148150008, "learning_rate": 4.81006653834426e-08, "loss": 0.0385, "step": 10518 }, { "epoch": 2.87, "grad_norm": 1.3765627716657398, "learning_rate": 4.7896959476508296e-08, "loss": 0.0375, "step": 10519 }, { "epoch": 2.87, "grad_norm": 1.9114280290934447, "learning_rate": 4.7693683755788975e-08, "loss": 0.0427, "step": 10520 }, { "epoch": 2.87, "grad_norm": 1.595345851984939, "learning_rate": 4.7490838238944957e-08, "loss": 0.0465, "step": 10521 }, { "epoch": 2.87, "grad_norm": 1.6092765283419799, "learning_rate": 4.7288422943596035e-08, "loss": 0.045, "step": 10522 }, { "epoch": 2.87, "grad_norm": 1.36869387181434, "learning_rate": 4.708643788732592e-08, "loss": 0.0432, "step": 10523 }, { "epoch": 2.87, "grad_norm": 1.5157135052333635, "learning_rate": 4.6884883087681686e-08, "loss": 0.0371, "step": 10524 }, { "epoch": 2.87, "grad_norm": 1.3869862708440757, "learning_rate": 4.668375856217156e-08, "loss": 0.0398, "step": 10525 }, { "epoch": 2.87, "grad_norm": 1.7007158361387813, "learning_rate": 4.64830643282671e-08, "loss": 0.0326, "step": 10526 }, { "epoch": 2.87, "grad_norm": 1.885647406751286, "learning_rate": 4.628280040340272e-08, "loss": 0.0516, "step": 10527 }, { "epoch": 2.87, "grad_norm": 1.6061252363205898, "learning_rate": 4.608296680497559e-08, "loss": 0.0452, "step": 10528 }, { "epoch": 2.87, "grad_norm": 1.4256128240117376, "learning_rate": 4.588356355034462e-08, "loss": 0.0499, "step": 10529 }, { "epoch": 2.87, "grad_norm": 1.5792852714537684, "learning_rate": 4.568459065683206e-08, "loss": 0.0449, "step": 10530 }, { "epoch": 2.87, "grad_norm": 1.500084858103287, "learning_rate": 4.5486048141721863e-08, "loss": 0.0508, "step": 10531 }, { "epoch": 2.88, "grad_norm": 1.3541734735358653, "learning_rate": 4.528793602226245e-08, "loss": 0.0411, "step": 10532 }, { "epoch": 2.88, "grad_norm": 1.4951113871859114, "learning_rate": 4.509025431566283e-08, "loss": 0.0419, "step": 10533 }, { "epoch": 2.88, "grad_norm": 1.7373534476859782, "learning_rate": 4.4893003039096494e-08, "loss": 0.0474, "step": 10534 }, { "epoch": 2.88, "grad_norm": 1.483227027649943, "learning_rate": 4.4696182209697515e-08, "loss": 0.0431, "step": 10535 }, { "epoch": 2.88, "grad_norm": 1.4612562224454717, "learning_rate": 4.449979184456388e-08, "loss": 0.043, "step": 10536 }, { "epoch": 2.88, "grad_norm": 1.4390181830254363, "learning_rate": 4.4303831960756385e-08, "loss": 0.0397, "step": 10537 }, { "epoch": 2.88, "grad_norm": 1.3125925501348044, "learning_rate": 4.410830257529752e-08, "loss": 0.0399, "step": 10538 }, { "epoch": 2.88, "grad_norm": 1.6130170533053534, "learning_rate": 4.391320370517205e-08, "loss": 0.0491, "step": 10539 }, { "epoch": 2.88, "grad_norm": 1.535329865789914, "learning_rate": 4.371853536732973e-08, "loss": 0.0454, "step": 10540 }, { "epoch": 2.88, "grad_norm": 1.4742641489493817, "learning_rate": 4.3524297578680375e-08, "loss": 0.0435, "step": 10541 }, { "epoch": 2.88, "grad_norm": 1.6370064759300502, "learning_rate": 4.333049035609715e-08, "loss": 0.0383, "step": 10542 }, { "epoch": 2.88, "grad_norm": 1.4106480480866852, "learning_rate": 4.3137113716416044e-08, "loss": 0.0386, "step": 10543 }, { "epoch": 2.88, "grad_norm": 1.433474046013542, "learning_rate": 4.294416767643639e-08, "loss": 0.0378, "step": 10544 }, { "epoch": 2.88, "grad_norm": 1.217668720277976, "learning_rate": 4.275165225291755e-08, "loss": 0.0386, "step": 10545 }, { "epoch": 2.88, "grad_norm": 1.229271611677372, "learning_rate": 4.255956746258505e-08, "loss": 0.0376, "step": 10546 }, { "epoch": 2.88, "grad_norm": 1.4680991228486628, "learning_rate": 4.236791332212498e-08, "loss": 0.0441, "step": 10547 }, { "epoch": 2.88, "grad_norm": 1.4890777102554775, "learning_rate": 4.217668984818513e-08, "loss": 0.0382, "step": 10548 }, { "epoch": 2.88, "grad_norm": 1.307591590139095, "learning_rate": 4.198589705737721e-08, "loss": 0.0425, "step": 10549 }, { "epoch": 2.88, "grad_norm": 1.4639753207858603, "learning_rate": 4.1795534966275754e-08, "loss": 0.0457, "step": 10550 }, { "epoch": 2.88, "grad_norm": 1.2184259958529033, "learning_rate": 4.1605603591416964e-08, "loss": 0.0392, "step": 10551 }, { "epoch": 2.88, "grad_norm": 1.4344470733187191, "learning_rate": 4.141610294930043e-08, "loss": 0.0429, "step": 10552 }, { "epoch": 2.88, "grad_norm": 1.3010250129450645, "learning_rate": 4.1227033056388535e-08, "loss": 0.0403, "step": 10553 }, { "epoch": 2.88, "grad_norm": 1.600697120343962, "learning_rate": 4.103839392910425e-08, "loss": 0.0449, "step": 10554 }, { "epoch": 2.88, "grad_norm": 1.4712729959667414, "learning_rate": 4.085018558383558e-08, "loss": 0.0384, "step": 10555 }, { "epoch": 2.88, "grad_norm": 1.4646342061481719, "learning_rate": 4.0662408036931664e-08, "loss": 0.0419, "step": 10556 }, { "epoch": 2.88, "grad_norm": 1.468054644516981, "learning_rate": 4.0475061304704465e-08, "loss": 0.0389, "step": 10557 }, { "epoch": 2.88, "grad_norm": 1.6829702234847743, "learning_rate": 4.028814540342985e-08, "loss": 0.0473, "step": 10558 }, { "epoch": 2.88, "grad_norm": 1.595935391284207, "learning_rate": 4.0101660349343706e-08, "loss": 0.0509, "step": 10559 }, { "epoch": 2.88, "grad_norm": 1.3820917856356962, "learning_rate": 3.991560615864587e-08, "loss": 0.0296, "step": 10560 }, { "epoch": 2.88, "grad_norm": 1.4516269972202702, "learning_rate": 3.972998284749952e-08, "loss": 0.0449, "step": 10561 }, { "epoch": 2.88, "grad_norm": 1.4199581421938772, "learning_rate": 3.9544790432029526e-08, "loss": 0.0395, "step": 10562 }, { "epoch": 2.88, "grad_norm": 1.4408793517747758, "learning_rate": 3.936002892832302e-08, "loss": 0.0431, "step": 10563 }, { "epoch": 2.88, "grad_norm": 1.4950793659775383, "learning_rate": 3.917569835243107e-08, "loss": 0.0456, "step": 10564 }, { "epoch": 2.88, "grad_norm": 1.3451696835761318, "learning_rate": 3.8991798720365296e-08, "loss": 0.0391, "step": 10565 }, { "epoch": 2.88, "grad_norm": 1.6242160414864777, "learning_rate": 3.880833004810125e-08, "loss": 0.0498, "step": 10566 }, { "epoch": 2.88, "grad_norm": 1.5004353431183932, "learning_rate": 3.862529235157675e-08, "loss": 0.0452, "step": 10567 }, { "epoch": 2.89, "grad_norm": 1.4781423286236863, "learning_rate": 3.84426856466924e-08, "loss": 0.0407, "step": 10568 }, { "epoch": 2.89, "grad_norm": 1.500867271292968, "learning_rate": 3.826050994931052e-08, "loss": 0.0429, "step": 10569 }, { "epoch": 2.89, "grad_norm": 1.4592679869171348, "learning_rate": 3.807876527525789e-08, "loss": 0.0387, "step": 10570 }, { "epoch": 2.89, "grad_norm": 1.6942215312920208, "learning_rate": 3.7897451640321326e-08, "loss": 0.0488, "step": 10571 }, { "epoch": 2.89, "grad_norm": 1.5777491719668757, "learning_rate": 3.771656906025212e-08, "loss": 0.0516, "step": 10572 }, { "epoch": 2.89, "grad_norm": 1.3124044562305939, "learning_rate": 3.753611755076269e-08, "loss": 0.0405, "step": 10573 }, { "epoch": 2.89, "grad_norm": 1.4259480081810934, "learning_rate": 3.7356097127529414e-08, "loss": 0.0369, "step": 10574 }, { "epoch": 2.89, "grad_norm": 1.351997642312581, "learning_rate": 3.717650780619031e-08, "loss": 0.0466, "step": 10575 }, { "epoch": 2.89, "grad_norm": 1.6707054177671112, "learning_rate": 3.6997349602346244e-08, "loss": 0.0433, "step": 10576 }, { "epoch": 2.89, "grad_norm": 1.4839038870689747, "learning_rate": 3.681862253156088e-08, "loss": 0.0471, "step": 10577 }, { "epoch": 2.89, "grad_norm": 1.421182300195976, "learning_rate": 3.6640326609359566e-08, "loss": 0.0448, "step": 10578 }, { "epoch": 2.89, "grad_norm": 1.3548519537328862, "learning_rate": 3.646246185123103e-08, "loss": 0.0381, "step": 10579 }, { "epoch": 2.89, "grad_norm": 1.6092962307539156, "learning_rate": 3.6285028272626255e-08, "loss": 0.0461, "step": 10580 }, { "epoch": 2.89, "grad_norm": 1.372974395435489, "learning_rate": 3.610802588895845e-08, "loss": 0.0408, "step": 10581 }, { "epoch": 2.89, "grad_norm": 1.5017275799961416, "learning_rate": 3.593145471560477e-08, "loss": 0.0454, "step": 10582 }, { "epoch": 2.89, "grad_norm": 1.4930879491599516, "learning_rate": 3.575531476790295e-08, "loss": 0.042, "step": 10583 }, { "epoch": 2.89, "grad_norm": 1.408209697329299, "learning_rate": 3.5579606061154626e-08, "loss": 0.0402, "step": 10584 }, { "epoch": 2.89, "grad_norm": 1.6603268217118023, "learning_rate": 3.5404328610622593e-08, "loss": 0.0437, "step": 10585 }, { "epoch": 2.89, "grad_norm": 1.725128325057117, "learning_rate": 3.522948243153412e-08, "loss": 0.0478, "step": 10586 }, { "epoch": 2.89, "grad_norm": 1.4743678820373143, "learning_rate": 3.505506753907761e-08, "loss": 0.0414, "step": 10587 }, { "epoch": 2.89, "grad_norm": 1.4854398040055226, "learning_rate": 3.488108394840428e-08, "loss": 0.0366, "step": 10588 }, { "epoch": 2.89, "grad_norm": 1.2474605478670773, "learning_rate": 3.470753167462815e-08, "loss": 0.0354, "step": 10589 }, { "epoch": 2.89, "grad_norm": 1.5363711041663948, "learning_rate": 3.4534410732825485e-08, "loss": 0.0489, "step": 10590 }, { "epoch": 2.89, "grad_norm": 1.3012499187284288, "learning_rate": 3.4361721138035375e-08, "loss": 0.0404, "step": 10591 }, { "epoch": 2.89, "grad_norm": 1.627693139689471, "learning_rate": 3.4189462905259154e-08, "loss": 0.0458, "step": 10592 }, { "epoch": 2.89, "grad_norm": 1.3848549171136908, "learning_rate": 3.40176360494604e-08, "loss": 0.0405, "step": 10593 }, { "epoch": 2.89, "grad_norm": 1.4540320628321397, "learning_rate": 3.3846240585566074e-08, "loss": 0.0369, "step": 10594 }, { "epoch": 2.89, "grad_norm": 1.3811105194800495, "learning_rate": 3.367527652846536e-08, "loss": 0.0405, "step": 10595 }, { "epoch": 2.89, "grad_norm": 1.47851606732552, "learning_rate": 3.3504743893009726e-08, "loss": 0.0406, "step": 10596 }, { "epoch": 2.89, "grad_norm": 1.307038979217443, "learning_rate": 3.333464269401232e-08, "loss": 0.0339, "step": 10597 }, { "epoch": 2.89, "grad_norm": 1.482110737868351, "learning_rate": 3.316497294625132e-08, "loss": 0.0402, "step": 10598 }, { "epoch": 2.89, "grad_norm": 1.5565306127406477, "learning_rate": 3.2995734664464373e-08, "loss": 0.0467, "step": 10599 }, { "epoch": 2.89, "grad_norm": 1.4052942799305057, "learning_rate": 3.2826927863354174e-08, "loss": 0.0348, "step": 10600 }, { "epoch": 2.89, "grad_norm": 1.4495009220449029, "learning_rate": 3.2658552557583986e-08, "loss": 0.0414, "step": 10601 }, { "epoch": 2.89, "grad_norm": 1.2496892731146683, "learning_rate": 3.249060876178156e-08, "loss": 0.032, "step": 10602 }, { "epoch": 2.89, "grad_norm": 1.3961382752405715, "learning_rate": 3.232309649053467e-08, "loss": 0.0407, "step": 10603 }, { "epoch": 2.89, "grad_norm": 1.6402632661595828, "learning_rate": 3.2156015758396106e-08, "loss": 0.0459, "step": 10604 }, { "epoch": 2.9, "grad_norm": 1.337580064108498, "learning_rate": 3.198936657987928e-08, "loss": 0.0357, "step": 10605 }, { "epoch": 2.9, "grad_norm": 1.3446132601068361, "learning_rate": 3.182314896946204e-08, "loss": 0.0387, "step": 10606 }, { "epoch": 2.9, "grad_norm": 1.43150128901666, "learning_rate": 3.165736294158228e-08, "loss": 0.0418, "step": 10607 }, { "epoch": 2.9, "grad_norm": 1.4174040032684947, "learning_rate": 3.1492008510642935e-08, "loss": 0.0351, "step": 10608 }, { "epoch": 2.9, "grad_norm": 1.405948665860412, "learning_rate": 3.1327085691006954e-08, "loss": 0.0447, "step": 10609 }, { "epoch": 2.9, "grad_norm": 1.1744522397206334, "learning_rate": 3.116259449700232e-08, "loss": 0.0374, "step": 10610 }, { "epoch": 2.9, "grad_norm": 1.4198323465996756, "learning_rate": 3.09985349429176e-08, "loss": 0.0401, "step": 10611 }, { "epoch": 2.9, "grad_norm": 1.3802792528284662, "learning_rate": 3.083490704300529e-08, "loss": 0.0446, "step": 10612 }, { "epoch": 2.9, "grad_norm": 1.259705200914064, "learning_rate": 3.067171081147846e-08, "loss": 0.035, "step": 10613 }, { "epoch": 2.9, "grad_norm": 1.8744874348942742, "learning_rate": 3.050894626251466e-08, "loss": 0.0509, "step": 10614 }, { "epoch": 2.9, "grad_norm": 1.7165307269300245, "learning_rate": 3.034661341025258e-08, "loss": 0.0545, "step": 10615 }, { "epoch": 2.9, "grad_norm": 1.6557424568776744, "learning_rate": 3.0184712268794824e-08, "loss": 0.0435, "step": 10616 }, { "epoch": 2.9, "grad_norm": 1.3624022671095553, "learning_rate": 3.002324285220515e-08, "loss": 0.0396, "step": 10617 }, { "epoch": 2.9, "grad_norm": 1.3157527987512172, "learning_rate": 2.9862205174510104e-08, "loss": 0.0337, "step": 10618 }, { "epoch": 2.9, "grad_norm": 1.1927533065961073, "learning_rate": 2.970159924969962e-08, "loss": 0.0341, "step": 10619 }, { "epoch": 2.9, "grad_norm": 1.4321051428770024, "learning_rate": 2.9541425091724195e-08, "loss": 0.043, "step": 10620 }, { "epoch": 2.9, "grad_norm": 1.2734553073270665, "learning_rate": 2.9381682714499372e-08, "loss": 0.0373, "step": 10621 }, { "epoch": 2.9, "grad_norm": 1.2211377567700816, "learning_rate": 2.9222372131901266e-08, "loss": 0.0318, "step": 10622 }, { "epoch": 2.9, "grad_norm": 1.2595193009598002, "learning_rate": 2.9063493357769368e-08, "loss": 0.0406, "step": 10623 }, { "epoch": 2.9, "grad_norm": 1.669762379640789, "learning_rate": 2.8905046405905412e-08, "loss": 0.0523, "step": 10624 }, { "epoch": 2.9, "grad_norm": 1.5509933566261285, "learning_rate": 2.8747031290072834e-08, "loss": 0.0462, "step": 10625 }, { "epoch": 2.9, "grad_norm": 1.6493909013553099, "learning_rate": 2.858944802399899e-08, "loss": 0.0444, "step": 10626 }, { "epoch": 2.9, "grad_norm": 1.6550358463538808, "learning_rate": 2.8432296621373478e-08, "loss": 0.0588, "step": 10627 }, { "epoch": 2.9, "grad_norm": 1.4231544392548225, "learning_rate": 2.8275577095846495e-08, "loss": 0.0437, "step": 10628 }, { "epoch": 2.9, "grad_norm": 1.2458509630893728, "learning_rate": 2.8119289461033817e-08, "loss": 0.0399, "step": 10629 }, { "epoch": 2.9, "grad_norm": 1.7910770037532278, "learning_rate": 2.796343373051069e-08, "loss": 0.0489, "step": 10630 }, { "epoch": 2.9, "grad_norm": 1.6393441870289993, "learning_rate": 2.7808009917817402e-08, "loss": 0.0455, "step": 10631 }, { "epoch": 2.9, "grad_norm": 1.5643553821934986, "learning_rate": 2.765301803645426e-08, "loss": 0.0444, "step": 10632 }, { "epoch": 2.9, "grad_norm": 1.4419643872100028, "learning_rate": 2.7498458099886605e-08, "loss": 0.0367, "step": 10633 }, { "epoch": 2.9, "grad_norm": 1.5201198650682082, "learning_rate": 2.7344330121539807e-08, "loss": 0.0487, "step": 10634 }, { "epoch": 2.9, "grad_norm": 1.4085618178721064, "learning_rate": 2.7190634114803717e-08, "loss": 0.0446, "step": 10635 }, { "epoch": 2.9, "grad_norm": 1.6583413173150827, "learning_rate": 2.7037370093029868e-08, "loss": 0.0553, "step": 10636 }, { "epoch": 2.9, "grad_norm": 1.5811792046777655, "learning_rate": 2.6884538069531506e-08, "loss": 0.0457, "step": 10637 }, { "epoch": 2.9, "grad_norm": 1.5155842228125431, "learning_rate": 2.6732138057585232e-08, "loss": 0.0424, "step": 10638 }, { "epoch": 2.9, "grad_norm": 1.5936853011347158, "learning_rate": 2.6580170070430457e-08, "loss": 0.0449, "step": 10639 }, { "epoch": 2.9, "grad_norm": 1.1087122341446085, "learning_rate": 2.6428634121267726e-08, "loss": 0.0339, "step": 10640 }, { "epoch": 2.9, "grad_norm": 1.2050498543844859, "learning_rate": 2.627753022326207e-08, "loss": 0.0379, "step": 10641 }, { "epoch": 2.91, "grad_norm": 1.5615352432417156, "learning_rate": 2.61268583895391e-08, "loss": 0.0464, "step": 10642 }, { "epoch": 2.91, "grad_norm": 1.727585114779161, "learning_rate": 2.5976618633187233e-08, "loss": 0.0468, "step": 10643 }, { "epoch": 2.91, "grad_norm": 1.4229766279107658, "learning_rate": 2.58268109672577e-08, "loss": 0.0411, "step": 10644 }, { "epoch": 2.91, "grad_norm": 1.468224347708062, "learning_rate": 2.5677435404765082e-08, "loss": 0.0425, "step": 10645 }, { "epoch": 2.91, "grad_norm": 1.4893922463100215, "learning_rate": 2.5528491958684565e-08, "loss": 0.044, "step": 10646 }, { "epoch": 2.91, "grad_norm": 1.509416337041036, "learning_rate": 2.5379980641955792e-08, "loss": 0.0446, "step": 10647 }, { "epoch": 2.91, "grad_norm": 1.3348902660693356, "learning_rate": 2.5231901467479004e-08, "loss": 0.0413, "step": 10648 }, { "epoch": 2.91, "grad_norm": 1.4358113102302523, "learning_rate": 2.5084254448117794e-08, "loss": 0.0425, "step": 10649 }, { "epoch": 2.91, "grad_norm": 1.4168325170032015, "learning_rate": 2.4937039596698576e-08, "loss": 0.0411, "step": 10650 }, { "epoch": 2.91, "grad_norm": 1.4003557393614532, "learning_rate": 2.4790256926010003e-08, "loss": 0.04, "step": 10651 }, { "epoch": 2.91, "grad_norm": 1.9397362452968625, "learning_rate": 2.4643906448801878e-08, "loss": 0.0465, "step": 10652 }, { "epoch": 2.91, "grad_norm": 1.3819049866061375, "learning_rate": 2.4497988177789034e-08, "loss": 0.041, "step": 10653 }, { "epoch": 2.91, "grad_norm": 1.5329520554347635, "learning_rate": 2.4352502125646882e-08, "loss": 0.0388, "step": 10654 }, { "epoch": 2.91, "grad_norm": 1.7876571659896303, "learning_rate": 2.4207448305012538e-08, "loss": 0.0534, "step": 10655 }, { "epoch": 2.91, "grad_norm": 1.768251136088922, "learning_rate": 2.406282672848814e-08, "loss": 0.054, "step": 10656 }, { "epoch": 2.91, "grad_norm": 1.7662025099317198, "learning_rate": 2.3918637408636425e-08, "loss": 0.0535, "step": 10657 }, { "epoch": 2.91, "grad_norm": 1.7250325793466337, "learning_rate": 2.3774880357982922e-08, "loss": 0.0407, "step": 10658 }, { "epoch": 2.91, "grad_norm": 1.597163184804839, "learning_rate": 2.363155558901542e-08, "loss": 0.0446, "step": 10659 }, { "epoch": 2.91, "grad_norm": 1.4798246531559354, "learning_rate": 2.3488663114185628e-08, "loss": 0.0497, "step": 10660 }, { "epoch": 2.91, "grad_norm": 1.5893666045051908, "learning_rate": 2.3346202945905284e-08, "loss": 0.0423, "step": 10661 }, { "epoch": 2.91, "grad_norm": 1.4695499344695997, "learning_rate": 2.320417509655004e-08, "loss": 0.044, "step": 10662 }, { "epoch": 2.91, "grad_norm": 1.4960748319277428, "learning_rate": 2.3062579578458365e-08, "loss": 0.0451, "step": 10663 }, { "epoch": 2.91, "grad_norm": 1.523977657211241, "learning_rate": 2.2921416403929863e-08, "loss": 0.0404, "step": 10664 }, { "epoch": 2.91, "grad_norm": 1.7230267713806449, "learning_rate": 2.2780685585227504e-08, "loss": 0.0577, "step": 10665 }, { "epoch": 2.91, "grad_norm": 1.617890761392945, "learning_rate": 2.264038713457706e-08, "loss": 0.0437, "step": 10666 }, { "epoch": 2.91, "grad_norm": 1.3440721676419964, "learning_rate": 2.250052106416545e-08, "loss": 0.0393, "step": 10667 }, { "epoch": 2.91, "grad_norm": 1.6007104281582487, "learning_rate": 2.2361087386142954e-08, "loss": 0.0455, "step": 10668 }, { "epoch": 2.91, "grad_norm": 1.4334373708446035, "learning_rate": 2.2222086112622665e-08, "loss": 0.0467, "step": 10669 }, { "epoch": 2.91, "grad_norm": 1.474125347782493, "learning_rate": 2.208351725567881e-08, "loss": 0.0395, "step": 10670 }, { "epoch": 2.91, "grad_norm": 1.4044074695879036, "learning_rate": 2.1945380827348985e-08, "loss": 0.0396, "step": 10671 }, { "epoch": 2.91, "grad_norm": 1.755600460382403, "learning_rate": 2.180767683963303e-08, "loss": 0.0428, "step": 10672 }, { "epoch": 2.91, "grad_norm": 1.5821853299495003, "learning_rate": 2.1670405304493047e-08, "loss": 0.0442, "step": 10673 }, { "epoch": 2.91, "grad_norm": 1.4700043696136404, "learning_rate": 2.1533566233853942e-08, "loss": 0.0417, "step": 10674 }, { "epoch": 2.91, "grad_norm": 1.7028030461164436, "learning_rate": 2.139715963960287e-08, "loss": 0.0435, "step": 10675 }, { "epoch": 2.91, "grad_norm": 1.7007027839298752, "learning_rate": 2.1261185533589246e-08, "loss": 0.0393, "step": 10676 }, { "epoch": 2.91, "grad_norm": 1.3895439241406904, "learning_rate": 2.1125643927625838e-08, "loss": 0.0407, "step": 10677 }, { "epoch": 2.92, "grad_norm": 1.3811205442440373, "learning_rate": 2.0990534833485455e-08, "loss": 0.0412, "step": 10678 }, { "epoch": 2.92, "grad_norm": 1.448575969141517, "learning_rate": 2.085585826290648e-08, "loss": 0.0383, "step": 10679 }, { "epoch": 2.92, "grad_norm": 1.4932019895325057, "learning_rate": 2.0721614227587338e-08, "loss": 0.0428, "step": 10680 }, { "epoch": 2.92, "grad_norm": 1.3679061540017559, "learning_rate": 2.058780273918981e-08, "loss": 0.041, "step": 10681 }, { "epoch": 2.92, "grad_norm": 1.5050632579938947, "learning_rate": 2.0454423809338487e-08, "loss": 0.0431, "step": 10682 }, { "epoch": 2.92, "grad_norm": 1.5588729395985919, "learning_rate": 2.0321477449619098e-08, "loss": 0.0385, "step": 10683 }, { "epoch": 2.92, "grad_norm": 1.3685898383044695, "learning_rate": 2.0188963671581852e-08, "loss": 0.0401, "step": 10684 }, { "epoch": 2.92, "grad_norm": 1.7847647381851344, "learning_rate": 2.0056882486736982e-08, "loss": 0.0506, "step": 10685 }, { "epoch": 2.92, "grad_norm": 1.294559231599737, "learning_rate": 1.992523390655865e-08, "loss": 0.0362, "step": 10686 }, { "epoch": 2.92, "grad_norm": 1.1469227324379572, "learning_rate": 1.9794017942483258e-08, "loss": 0.0308, "step": 10687 }, { "epoch": 2.92, "grad_norm": 1.3411783014450693, "learning_rate": 1.9663234605909465e-08, "loss": 0.0419, "step": 10688 }, { "epoch": 2.92, "grad_norm": 1.63278633193149, "learning_rate": 1.9532883908198185e-08, "loss": 0.0426, "step": 10689 }, { "epoch": 2.92, "grad_norm": 1.5424335388261954, "learning_rate": 1.9402965860672584e-08, "loss": 0.0417, "step": 10690 }, { "epoch": 2.92, "grad_norm": 1.5409700082670148, "learning_rate": 1.927348047461919e-08, "loss": 0.0537, "step": 10691 }, { "epoch": 2.92, "grad_norm": 1.4289587527834118, "learning_rate": 1.914442776128622e-08, "loss": 0.0409, "step": 10692 }, { "epoch": 2.92, "grad_norm": 1.2158352555373946, "learning_rate": 1.9015807731884163e-08, "loss": 0.0304, "step": 10693 }, { "epoch": 2.92, "grad_norm": 1.348840203398288, "learning_rate": 1.8887620397586292e-08, "loss": 0.0408, "step": 10694 }, { "epoch": 2.92, "grad_norm": 1.6089578505695998, "learning_rate": 1.8759865769528153e-08, "loss": 0.0522, "step": 10695 }, { "epoch": 2.92, "grad_norm": 1.4076272186639576, "learning_rate": 1.8632543858807528e-08, "loss": 0.0411, "step": 10696 }, { "epoch": 2.92, "grad_norm": 1.3349742004464702, "learning_rate": 1.850565467648502e-08, "loss": 0.0431, "step": 10697 }, { "epoch": 2.92, "grad_norm": 1.4985578050175992, "learning_rate": 1.8379198233583472e-08, "loss": 0.0474, "step": 10698 }, { "epoch": 2.92, "grad_norm": 1.5666280567917392, "learning_rate": 1.8253174541087437e-08, "loss": 0.0491, "step": 10699 }, { "epoch": 2.92, "grad_norm": 1.4397053228698713, "learning_rate": 1.8127583609945376e-08, "loss": 0.0394, "step": 10700 }, { "epoch": 2.92, "grad_norm": 1.8288756968783817, "learning_rate": 1.8002425451067452e-08, "loss": 0.0426, "step": 10701 }, { "epoch": 2.92, "grad_norm": 1.3427518951434703, "learning_rate": 1.7877700075324966e-08, "loss": 0.0351, "step": 10702 }, { "epoch": 2.92, "grad_norm": 1.5922547653910373, "learning_rate": 1.7753407493553698e-08, "loss": 0.0526, "step": 10703 }, { "epoch": 2.92, "grad_norm": 1.5562393843258577, "learning_rate": 1.762954771655001e-08, "loss": 0.0421, "step": 10704 }, { "epoch": 2.92, "grad_norm": 1.365655872556554, "learning_rate": 1.750612075507474e-08, "loss": 0.0393, "step": 10705 }, { "epoch": 2.92, "grad_norm": 1.46759862072386, "learning_rate": 1.7383126619848756e-08, "loss": 0.0453, "step": 10706 }, { "epoch": 2.92, "grad_norm": 1.345111268280631, "learning_rate": 1.7260565321556843e-08, "loss": 0.0413, "step": 10707 }, { "epoch": 2.92, "grad_norm": 1.4855784706720545, "learning_rate": 1.7138436870846598e-08, "loss": 0.0429, "step": 10708 }, { "epoch": 2.92, "grad_norm": 1.506809403891813, "learning_rate": 1.70167412783262e-08, "loss": 0.0445, "step": 10709 }, { "epoch": 2.92, "grad_norm": 1.315210769585116, "learning_rate": 1.689547855456719e-08, "loss": 0.0389, "step": 10710 }, { "epoch": 2.92, "grad_norm": 1.6047562802574138, "learning_rate": 1.677464871010448e-08, "loss": 0.0457, "step": 10711 }, { "epoch": 2.92, "grad_norm": 1.353384694078905, "learning_rate": 1.6654251755434115e-08, "loss": 0.0372, "step": 10712 }, { "epoch": 2.92, "grad_norm": 1.5232199605381727, "learning_rate": 1.653428770101495e-08, "loss": 0.0411, "step": 10713 }, { "epoch": 2.92, "grad_norm": 1.2670046797244183, "learning_rate": 1.6414756557267542e-08, "loss": 0.0337, "step": 10714 }, { "epoch": 2.93, "grad_norm": 1.5716873497739476, "learning_rate": 1.629565833457636e-08, "loss": 0.0495, "step": 10715 }, { "epoch": 2.93, "grad_norm": 1.5697445399666754, "learning_rate": 1.617699304328757e-08, "loss": 0.0465, "step": 10716 }, { "epoch": 2.93, "grad_norm": 1.3800726614807521, "learning_rate": 1.6058760693708487e-08, "loss": 0.0409, "step": 10717 }, { "epoch": 2.93, "grad_norm": 1.34750262465537, "learning_rate": 1.5940961296110335e-08, "loss": 0.0448, "step": 10718 }, { "epoch": 2.93, "grad_norm": 1.4988958278377509, "learning_rate": 1.5823594860726598e-08, "loss": 0.0471, "step": 10719 }, { "epoch": 2.93, "grad_norm": 1.3945111778973382, "learning_rate": 1.5706661397753008e-08, "loss": 0.0412, "step": 10720 }, { "epoch": 2.93, "grad_norm": 1.5667550513175472, "learning_rate": 1.5590160917346443e-08, "loss": 0.0487, "step": 10721 }, { "epoch": 2.93, "grad_norm": 1.7088042943121222, "learning_rate": 1.5474093429628246e-08, "loss": 0.0462, "step": 10722 }, { "epoch": 2.93, "grad_norm": 1.530968566214379, "learning_rate": 1.5358458944680356e-08, "loss": 0.0444, "step": 10723 }, { "epoch": 2.93, "grad_norm": 1.620501369165027, "learning_rate": 1.5243257472549178e-08, "loss": 0.0554, "step": 10724 }, { "epoch": 2.93, "grad_norm": 1.3434823417206685, "learning_rate": 1.51284890232406e-08, "loss": 0.0392, "step": 10725 }, { "epoch": 2.93, "grad_norm": 1.6184885690484216, "learning_rate": 1.5014153606725535e-08, "loss": 0.0531, "step": 10726 }, { "epoch": 2.93, "grad_norm": 1.6383499239853183, "learning_rate": 1.4900251232935482e-08, "loss": 0.0468, "step": 10727 }, { "epoch": 2.93, "grad_norm": 1.420321950898719, "learning_rate": 1.4786781911765857e-08, "loss": 0.0417, "step": 10728 }, { "epoch": 2.93, "grad_norm": 1.4099910906282243, "learning_rate": 1.4673745653073223e-08, "loss": 0.0403, "step": 10729 }, { "epoch": 2.93, "grad_norm": 1.5804191938807965, "learning_rate": 1.4561142466677502e-08, "loss": 0.0487, "step": 10730 }, { "epoch": 2.93, "grad_norm": 1.2233289338773374, "learning_rate": 1.4448972362359759e-08, "loss": 0.0344, "step": 10731 }, { "epoch": 2.93, "grad_norm": 1.4893824474879294, "learning_rate": 1.433723534986442e-08, "loss": 0.0418, "step": 10732 }, { "epoch": 2.93, "grad_norm": 1.300561263564656, "learning_rate": 1.4225931438897612e-08, "loss": 0.0348, "step": 10733 }, { "epoch": 2.93, "grad_norm": 1.3828893883781943, "learning_rate": 1.411506063912882e-08, "loss": 0.0426, "step": 10734 }, { "epoch": 2.93, "grad_norm": 1.7011372756451106, "learning_rate": 1.4004622960189229e-08, "loss": 0.0489, "step": 10735 }, { "epoch": 2.93, "grad_norm": 1.4554272950162535, "learning_rate": 1.3894618411672278e-08, "loss": 0.0427, "step": 10736 }, { "epoch": 2.93, "grad_norm": 1.419167376558054, "learning_rate": 1.3785047003134211e-08, "loss": 0.0455, "step": 10737 }, { "epoch": 2.93, "grad_norm": 1.4955770762044773, "learning_rate": 1.3675908744093524e-08, "loss": 0.0418, "step": 10738 }, { "epoch": 2.93, "grad_norm": 1.4166534640261086, "learning_rate": 1.3567203644030414e-08, "loss": 0.0371, "step": 10739 }, { "epoch": 2.93, "grad_norm": 1.8107884742851748, "learning_rate": 1.3458931712388434e-08, "loss": 0.0516, "step": 10740 }, { "epoch": 2.93, "grad_norm": 1.2615096438842115, "learning_rate": 1.3351092958573397e-08, "loss": 0.0343, "step": 10741 }, { "epoch": 2.93, "grad_norm": 1.3032461606273722, "learning_rate": 1.3243687391952809e-08, "loss": 0.0427, "step": 10742 }, { "epoch": 2.93, "grad_norm": 1.303561856417339, "learning_rate": 1.3136715021856983e-08, "loss": 0.0374, "step": 10743 }, { "epoch": 2.93, "grad_norm": 1.6545778761700545, "learning_rate": 1.3030175857578487e-08, "loss": 0.0437, "step": 10744 }, { "epoch": 2.93, "grad_norm": 1.4699715611802815, "learning_rate": 1.292406990837214e-08, "loss": 0.0408, "step": 10745 }, { "epoch": 2.93, "grad_norm": 1.5282628525093926, "learning_rate": 1.2818397183456122e-08, "loss": 0.045, "step": 10746 }, { "epoch": 2.93, "grad_norm": 1.387864496928228, "learning_rate": 1.2713157692008648e-08, "loss": 0.0349, "step": 10747 }, { "epoch": 2.93, "grad_norm": 1.4244961667150988, "learning_rate": 1.2608351443173516e-08, "loss": 0.0405, "step": 10748 }, { "epoch": 2.93, "grad_norm": 1.4531064337285766, "learning_rate": 1.2503978446054555e-08, "loss": 0.0427, "step": 10749 }, { "epoch": 2.93, "grad_norm": 1.385745563514159, "learning_rate": 1.2400038709717843e-08, "loss": 0.045, "step": 10750 }, { "epoch": 2.94, "grad_norm": 1.8587841268297296, "learning_rate": 1.2296532243193382e-08, "loss": 0.0558, "step": 10751 }, { "epoch": 2.94, "grad_norm": 1.334795964776019, "learning_rate": 1.2193459055472867e-08, "loss": 0.0359, "step": 10752 }, { "epoch": 2.94, "grad_norm": 1.6644759635299555, "learning_rate": 1.2090819155509137e-08, "loss": 0.0409, "step": 10753 }, { "epoch": 2.94, "grad_norm": 1.5957644796857635, "learning_rate": 1.1988612552219503e-08, "loss": 0.041, "step": 10754 }, { "epoch": 2.94, "grad_norm": 1.4370771175671393, "learning_rate": 1.1886839254482419e-08, "loss": 0.0434, "step": 10755 }, { "epoch": 2.94, "grad_norm": 1.7611440443201483, "learning_rate": 1.178549927113859e-08, "loss": 0.0438, "step": 10756 }, { "epoch": 2.94, "grad_norm": 1.5167430800846882, "learning_rate": 1.168459261099153e-08, "loss": 0.0465, "step": 10757 }, { "epoch": 2.94, "grad_norm": 1.544796971352805, "learning_rate": 1.158411928280645e-08, "loss": 0.0533, "step": 10758 }, { "epoch": 2.94, "grad_norm": 1.274658353266541, "learning_rate": 1.1484079295311923e-08, "loss": 0.0417, "step": 10759 }, { "epoch": 2.94, "grad_norm": 1.4947542967215883, "learning_rate": 1.1384472657198775e-08, "loss": 0.0389, "step": 10760 }, { "epoch": 2.94, "grad_norm": 1.373873197759611, "learning_rate": 1.1285299377118974e-08, "loss": 0.0431, "step": 10761 }, { "epoch": 2.94, "grad_norm": 1.6425062783641782, "learning_rate": 1.1186559463687851e-08, "loss": 0.0457, "step": 10762 }, { "epoch": 2.94, "grad_norm": 1.4095498211526, "learning_rate": 1.1088252925482989e-08, "loss": 0.0412, "step": 10763 }, { "epoch": 2.94, "grad_norm": 1.349540029139377, "learning_rate": 1.0990379771044223e-08, "loss": 0.0434, "step": 10764 }, { "epoch": 2.94, "grad_norm": 1.627067391370231, "learning_rate": 1.0892940008873642e-08, "loss": 0.0494, "step": 10765 }, { "epoch": 2.94, "grad_norm": 1.3155204605664943, "learning_rate": 1.0795933647436141e-08, "loss": 0.0356, "step": 10766 }, { "epoch": 2.94, "grad_norm": 1.3276536092192552, "learning_rate": 1.0699360695158311e-08, "loss": 0.0416, "step": 10767 }, { "epoch": 2.94, "grad_norm": 1.6432370559531282, "learning_rate": 1.0603221160429e-08, "loss": 0.0465, "step": 10768 }, { "epoch": 2.94, "grad_norm": 1.629532341534546, "learning_rate": 1.0507515051600415e-08, "loss": 0.0387, "step": 10769 }, { "epoch": 2.94, "grad_norm": 1.6497503371717654, "learning_rate": 1.0412242376985903e-08, "loss": 0.0538, "step": 10770 }, { "epoch": 2.94, "grad_norm": 1.3868137418797266, "learning_rate": 1.0317403144862182e-08, "loss": 0.0436, "step": 10771 }, { "epoch": 2.94, "grad_norm": 1.2919589733922905, "learning_rate": 1.0222997363468213e-08, "loss": 0.0369, "step": 10772 }, { "epoch": 2.94, "grad_norm": 1.4929805937932075, "learning_rate": 1.0129025041004659e-08, "loss": 0.0405, "step": 10773 }, { "epoch": 2.94, "grad_norm": 1.3469095328420277, "learning_rate": 1.003548618563388e-08, "loss": 0.0428, "step": 10774 }, { "epoch": 2.94, "grad_norm": 1.4184959989244899, "learning_rate": 9.942380805483266e-09, "loss": 0.0428, "step": 10775 }, { "epoch": 2.94, "grad_norm": 1.5614177386903487, "learning_rate": 9.849708908639677e-09, "loss": 0.0479, "step": 10776 }, { "epoch": 2.94, "grad_norm": 1.5611753328101294, "learning_rate": 9.757470503153344e-09, "loss": 0.045, "step": 10777 }, { "epoch": 2.94, "grad_norm": 1.4504061320816564, "learning_rate": 9.6656655970373e-09, "loss": 0.0493, "step": 10778 }, { "epoch": 2.94, "grad_norm": 1.3698716107193216, "learning_rate": 9.574294198267387e-09, "loss": 0.0387, "step": 10779 }, { "epoch": 2.94, "grad_norm": 1.4573844024879703, "learning_rate": 9.48335631477948e-09, "loss": 0.0395, "step": 10780 }, { "epoch": 2.94, "grad_norm": 1.3734655329160572, "learning_rate": 9.39285195447448e-09, "loss": 0.0363, "step": 10781 }, { "epoch": 2.94, "grad_norm": 1.3251746429178852, "learning_rate": 9.302781125213878e-09, "loss": 0.0383, "step": 10782 }, { "epoch": 2.94, "grad_norm": 1.5693584948225776, "learning_rate": 9.213143834822524e-09, "loss": 0.0546, "step": 10783 }, { "epoch": 2.94, "grad_norm": 1.6551295003690372, "learning_rate": 9.123940091086414e-09, "loss": 0.047, "step": 10784 }, { "epoch": 2.94, "grad_norm": 1.5290974854365542, "learning_rate": 9.035169901754902e-09, "loss": 0.0353, "step": 10785 }, { "epoch": 2.94, "grad_norm": 1.62696547558024, "learning_rate": 8.946833274540157e-09, "loss": 0.0466, "step": 10786 }, { "epoch": 2.94, "grad_norm": 1.3240798031847774, "learning_rate": 8.858930217114925e-09, "loss": 0.0354, "step": 10787 }, { "epoch": 2.95, "grad_norm": 1.267955964357376, "learning_rate": 8.771460737115878e-09, "loss": 0.0373, "step": 10788 }, { "epoch": 2.95, "grad_norm": 1.7453964815444112, "learning_rate": 8.684424842140825e-09, "loss": 0.0465, "step": 10789 }, { "epoch": 2.95, "grad_norm": 1.307256082800548, "learning_rate": 8.5978225397515e-09, "loss": 0.0359, "step": 10790 }, { "epoch": 2.95, "grad_norm": 1.4961167991739515, "learning_rate": 8.511653837470212e-09, "loss": 0.0486, "step": 10791 }, { "epoch": 2.95, "grad_norm": 1.6846583699787798, "learning_rate": 8.425918742782646e-09, "loss": 0.051, "step": 10792 }, { "epoch": 2.95, "grad_norm": 1.414505837094659, "learning_rate": 8.340617263136175e-09, "loss": 0.0405, "step": 10793 }, { "epoch": 2.95, "grad_norm": 1.6030134358880264, "learning_rate": 8.255749405941538e-09, "loss": 0.0434, "step": 10794 }, { "epoch": 2.95, "grad_norm": 1.602428884836434, "learning_rate": 8.171315178570616e-09, "loss": 0.0419, "step": 10795 }, { "epoch": 2.95, "grad_norm": 1.842711205773103, "learning_rate": 8.087314588358653e-09, "loss": 0.0483, "step": 10796 }, { "epoch": 2.95, "grad_norm": 1.4852357560728653, "learning_rate": 8.003747642602588e-09, "loss": 0.0444, "step": 10797 }, { "epoch": 2.95, "grad_norm": 1.4311937899110352, "learning_rate": 7.920614348561618e-09, "loss": 0.0429, "step": 10798 }, { "epoch": 2.95, "grad_norm": 1.441337972264241, "learning_rate": 7.837914713457184e-09, "loss": 0.0464, "step": 10799 }, { "epoch": 2.95, "grad_norm": 1.4898772018645408, "learning_rate": 7.755648744474097e-09, "loss": 0.043, "step": 10800 }, { "epoch": 2.95, "grad_norm": 1.4585072850147554, "learning_rate": 7.67381644875831e-09, "loss": 0.0488, "step": 10801 }, { "epoch": 2.95, "grad_norm": 1.6279724664834248, "learning_rate": 7.59241783341913e-09, "loss": 0.0396, "step": 10802 }, { "epoch": 2.95, "grad_norm": 1.498728209324664, "learning_rate": 7.511452905526462e-09, "loss": 0.0464, "step": 10803 }, { "epoch": 2.95, "grad_norm": 1.3981991639232685, "learning_rate": 7.430921672114677e-09, "loss": 0.0381, "step": 10804 }, { "epoch": 2.95, "grad_norm": 1.464355203458042, "learning_rate": 7.350824140178736e-09, "loss": 0.0435, "step": 10805 }, { "epoch": 2.95, "grad_norm": 1.592620134381601, "learning_rate": 7.271160316677517e-09, "loss": 0.0501, "step": 10806 }, { "epoch": 2.95, "grad_norm": 1.3458280643468141, "learning_rate": 7.191930208530485e-09, "loss": 0.0408, "step": 10807 }, { "epoch": 2.95, "grad_norm": 1.6062566831242322, "learning_rate": 7.113133822621021e-09, "loss": 0.0504, "step": 10808 }, { "epoch": 2.95, "grad_norm": 1.4190359223821862, "learning_rate": 7.034771165794208e-09, "loss": 0.0394, "step": 10809 }, { "epoch": 2.95, "grad_norm": 1.5126871254060108, "learning_rate": 6.956842244856266e-09, "loss": 0.0447, "step": 10810 }, { "epoch": 2.95, "grad_norm": 1.322002025450512, "learning_rate": 6.87934706657789e-09, "loss": 0.0392, "step": 10811 }, { "epoch": 2.95, "grad_norm": 1.6501446334555008, "learning_rate": 6.802285637690364e-09, "loss": 0.0524, "step": 10812 }, { "epoch": 2.95, "grad_norm": 1.573952576165649, "learning_rate": 6.725657964888887e-09, "loss": 0.0546, "step": 10813 }, { "epoch": 2.95, "grad_norm": 1.5134178138175283, "learning_rate": 6.649464054829246e-09, "loss": 0.0306, "step": 10814 }, { "epoch": 2.95, "grad_norm": 1.243906246997328, "learning_rate": 6.573703914130591e-09, "loss": 0.0346, "step": 10815 }, { "epoch": 2.95, "grad_norm": 1.7778733964028812, "learning_rate": 6.498377549374324e-09, "loss": 0.0459, "step": 10816 }, { "epoch": 2.95, "grad_norm": 1.5185647835941254, "learning_rate": 6.423484967103544e-09, "loss": 0.0458, "step": 10817 }, { "epoch": 2.95, "grad_norm": 1.4506850808515908, "learning_rate": 6.349026173824713e-09, "loss": 0.0417, "step": 10818 }, { "epoch": 2.95, "grad_norm": 1.316557314909194, "learning_rate": 6.2750011760054355e-09, "loss": 0.0377, "step": 10819 }, { "epoch": 2.95, "grad_norm": 1.613417561484535, "learning_rate": 6.201409980076678e-09, "loss": 0.0508, "step": 10820 }, { "epoch": 2.95, "grad_norm": 1.2832144289214522, "learning_rate": 6.128252592431105e-09, "loss": 0.035, "step": 10821 }, { "epoch": 2.95, "grad_norm": 1.7144320753603994, "learning_rate": 6.055529019423634e-09, "loss": 0.0478, "step": 10822 }, { "epoch": 2.95, "grad_norm": 1.6079462766544612, "learning_rate": 5.983239267371987e-09, "loss": 0.0448, "step": 10823 }, { "epoch": 2.95, "grad_norm": 1.4592229649283994, "learning_rate": 5.911383342556143e-09, "loss": 0.0403, "step": 10824 }, { "epoch": 2.96, "grad_norm": 1.5591005189827973, "learning_rate": 5.839961251217774e-09, "loss": 0.0422, "step": 10825 }, { "epoch": 2.96, "grad_norm": 1.4264799517020308, "learning_rate": 5.768972999561362e-09, "loss": 0.0385, "step": 10826 }, { "epoch": 2.96, "grad_norm": 1.7476446332307087, "learning_rate": 5.698418593754196e-09, "loss": 0.0473, "step": 10827 }, { "epoch": 2.96, "grad_norm": 1.2939486277703305, "learning_rate": 5.628298039924152e-09, "loss": 0.0426, "step": 10828 }, { "epoch": 2.96, "grad_norm": 1.5195819848685794, "learning_rate": 5.558611344163023e-09, "loss": 0.0519, "step": 10829 }, { "epoch": 2.96, "grad_norm": 1.4159970819293244, "learning_rate": 5.489358512524856e-09, "loss": 0.0337, "step": 10830 }, { "epoch": 2.96, "grad_norm": 1.2766911446969194, "learning_rate": 5.4205395510253944e-09, "loss": 0.0362, "step": 10831 }, { "epoch": 2.96, "grad_norm": 1.3779434282763896, "learning_rate": 5.352154465643189e-09, "loss": 0.0377, "step": 10832 }, { "epoch": 2.96, "grad_norm": 1.296236610449945, "learning_rate": 5.284203262318488e-09, "loss": 0.0362, "step": 10833 }, { "epoch": 2.96, "grad_norm": 1.3996965138864614, "learning_rate": 5.216685946953237e-09, "loss": 0.0393, "step": 10834 }, { "epoch": 2.96, "grad_norm": 1.4030681677837804, "learning_rate": 5.14960252541441e-09, "loss": 0.0344, "step": 10835 }, { "epoch": 2.96, "grad_norm": 1.5777290966746682, "learning_rate": 5.082953003528457e-09, "loss": 0.0379, "step": 10836 }, { "epoch": 2.96, "grad_norm": 1.7139913975929897, "learning_rate": 5.016737387085191e-09, "loss": 0.0564, "step": 10837 }, { "epoch": 2.96, "grad_norm": 1.4141745498864458, "learning_rate": 4.950955681837233e-09, "loss": 0.0455, "step": 10838 }, { "epoch": 2.96, "grad_norm": 1.4399323882780701, "learning_rate": 4.885607893498345e-09, "loss": 0.0422, "step": 10839 }, { "epoch": 2.96, "grad_norm": 1.4369208235777924, "learning_rate": 4.8206940277456534e-09, "loss": 0.0387, "step": 10840 }, { "epoch": 2.96, "grad_norm": 1.3570717895802908, "learning_rate": 4.7562140902185364e-09, "loss": 0.0401, "step": 10841 }, { "epoch": 2.96, "grad_norm": 1.7270394626888796, "learning_rate": 4.6921680865169574e-09, "loss": 0.0441, "step": 10842 }, { "epoch": 2.96, "grad_norm": 1.5126939251320637, "learning_rate": 4.6285560222064655e-09, "loss": 0.0413, "step": 10843 }, { "epoch": 2.96, "grad_norm": 1.6160771446346398, "learning_rate": 4.565377902811529e-09, "loss": 0.0427, "step": 10844 }, { "epoch": 2.96, "grad_norm": 1.5580995191338787, "learning_rate": 4.502633733821093e-09, "loss": 0.0414, "step": 10845 }, { "epoch": 2.96, "grad_norm": 1.5602462455976787, "learning_rate": 4.440323520685241e-09, "loss": 0.0524, "step": 10846 }, { "epoch": 2.96, "grad_norm": 1.8680690286145887, "learning_rate": 4.378447268817421e-09, "loss": 0.0518, "step": 10847 }, { "epoch": 2.96, "grad_norm": 1.2513158544551304, "learning_rate": 4.317004983592221e-09, "loss": 0.035, "step": 10848 }, { "epoch": 2.96, "grad_norm": 1.2011321173739833, "learning_rate": 4.25599667034704e-09, "loss": 0.0351, "step": 10849 }, { "epoch": 2.96, "grad_norm": 1.5202535962272616, "learning_rate": 4.195422334382638e-09, "loss": 0.0399, "step": 10850 }, { "epoch": 2.96, "grad_norm": 1.5785499276976593, "learning_rate": 4.1352819809598045e-09, "loss": 0.0471, "step": 10851 }, { "epoch": 2.96, "grad_norm": 1.4711722796643292, "learning_rate": 4.075575615303807e-09, "loss": 0.0446, "step": 10852 }, { "epoch": 2.96, "grad_norm": 1.3915687685053666, "learning_rate": 4.016303242600495e-09, "loss": 0.0421, "step": 10853 }, { "epoch": 2.96, "grad_norm": 1.5622287834229704, "learning_rate": 3.957464867999638e-09, "loss": 0.0438, "step": 10854 }, { "epoch": 2.96, "grad_norm": 1.5061642361001593, "learning_rate": 3.8990604966121504e-09, "loss": 0.0461, "step": 10855 }, { "epoch": 2.96, "grad_norm": 1.843311463282505, "learning_rate": 3.841090133511749e-09, "loss": 0.0555, "step": 10856 }, { "epoch": 2.96, "grad_norm": 1.4738266521322858, "learning_rate": 3.783553783733851e-09, "loss": 0.0458, "step": 10857 }, { "epoch": 2.96, "grad_norm": 1.654708273593146, "learning_rate": 3.72645145227668e-09, "loss": 0.0481, "step": 10858 }, { "epoch": 2.96, "grad_norm": 1.2932316918101079, "learning_rate": 3.6697831441007136e-09, "loss": 0.0349, "step": 10859 }, { "epoch": 2.96, "grad_norm": 1.243118157193055, "learning_rate": 3.613548864129235e-09, "loss": 0.0388, "step": 10860 }, { "epoch": 2.97, "grad_norm": 1.262885487662697, "learning_rate": 3.5577486172466703e-09, "loss": 0.0328, "step": 10861 }, { "epoch": 2.97, "grad_norm": 1.6348210138976664, "learning_rate": 3.5023824083008083e-09, "loss": 0.0399, "step": 10862 }, { "epoch": 2.97, "grad_norm": 1.394156585320726, "learning_rate": 3.4474502421005805e-09, "loss": 0.0428, "step": 10863 }, { "epoch": 2.97, "grad_norm": 1.6725504248972054, "learning_rate": 3.3929521234188358e-09, "loss": 0.0474, "step": 10864 }, { "epoch": 2.97, "grad_norm": 1.5900750531295955, "learning_rate": 3.338888056989009e-09, "loss": 0.0493, "step": 10865 }, { "epoch": 2.97, "grad_norm": 1.53002408772992, "learning_rate": 3.2852580475078997e-09, "loss": 0.0458, "step": 10866 }, { "epoch": 2.97, "grad_norm": 1.8165536663958906, "learning_rate": 3.2320620996345586e-09, "loss": 0.0443, "step": 10867 }, { "epoch": 2.97, "grad_norm": 1.4582502274869606, "learning_rate": 3.1793002179897337e-09, "loss": 0.0421, "step": 10868 }, { "epoch": 2.97, "grad_norm": 1.6026475678728451, "learning_rate": 3.126972407156981e-09, "loss": 0.0518, "step": 10869 }, { "epoch": 2.97, "grad_norm": 1.3202264582545662, "learning_rate": 3.075078671682108e-09, "loss": 0.0331, "step": 10870 }, { "epoch": 2.97, "grad_norm": 1.504591880791957, "learning_rate": 3.023619016072621e-09, "loss": 0.043, "step": 10871 }, { "epoch": 2.97, "grad_norm": 1.6565918996954974, "learning_rate": 2.9725934447993875e-09, "loss": 0.0497, "step": 10872 }, { "epoch": 2.97, "grad_norm": 1.4107936064471835, "learning_rate": 2.9220019622944184e-09, "loss": 0.037, "step": 10873 }, { "epoch": 2.97, "grad_norm": 1.606013548084908, "learning_rate": 2.8718445729530862e-09, "loss": 0.044, "step": 10874 }, { "epoch": 2.97, "grad_norm": 1.2578276030307272, "learning_rate": 2.8221212811324616e-09, "loss": 0.039, "step": 10875 }, { "epoch": 2.97, "grad_norm": 1.49840643551066, "learning_rate": 2.772832091151312e-09, "loss": 0.0453, "step": 10876 }, { "epoch": 2.97, "grad_norm": 1.3789989912753413, "learning_rate": 2.7239770072923223e-09, "loss": 0.0411, "step": 10877 }, { "epoch": 2.97, "grad_norm": 1.5225099461071243, "learning_rate": 2.675556033798765e-09, "loss": 0.0494, "step": 10878 }, { "epoch": 2.97, "grad_norm": 1.2759255895957398, "learning_rate": 2.6275691748767207e-09, "loss": 0.0405, "step": 10879 }, { "epoch": 2.97, "grad_norm": 1.457528019426865, "learning_rate": 2.5800164346961864e-09, "loss": 0.0442, "step": 10880 }, { "epoch": 2.97, "grad_norm": 1.5310093899628099, "learning_rate": 2.532897817386637e-09, "loss": 0.0456, "step": 10881 }, { "epoch": 2.97, "grad_norm": 1.5675491949601992, "learning_rate": 2.4862133270414644e-09, "loss": 0.0407, "step": 10882 }, { "epoch": 2.97, "grad_norm": 1.5227049475229266, "learning_rate": 2.439962967716869e-09, "loss": 0.0499, "step": 10883 }, { "epoch": 2.97, "grad_norm": 1.3894211815770774, "learning_rate": 2.3941467434296372e-09, "loss": 0.0443, "step": 10884 }, { "epoch": 2.97, "grad_norm": 1.3684873567453495, "learning_rate": 2.348764658160474e-09, "loss": 0.0398, "step": 10885 }, { "epoch": 2.97, "grad_norm": 1.5040855349948603, "learning_rate": 2.303816715851781e-09, "loss": 0.0423, "step": 10886 }, { "epoch": 2.97, "grad_norm": 1.8301923480515854, "learning_rate": 2.2593029204076578e-09, "loss": 0.0541, "step": 10887 }, { "epoch": 2.97, "grad_norm": 1.479666917008766, "learning_rate": 2.215223275695011e-09, "loss": 0.0478, "step": 10888 }, { "epoch": 2.97, "grad_norm": 1.553745389209031, "learning_rate": 2.171577785543e-09, "loss": 0.0466, "step": 10889 }, { "epoch": 2.97, "grad_norm": 1.4864910519163095, "learning_rate": 2.128366453743591e-09, "loss": 0.0395, "step": 10890 }, { "epoch": 2.97, "grad_norm": 1.3934181250881392, "learning_rate": 2.085589284050449e-09, "loss": 0.0427, "step": 10891 }, { "epoch": 2.97, "grad_norm": 1.2006737840195014, "learning_rate": 2.0432462801789344e-09, "loss": 0.0365, "step": 10892 }, { "epoch": 2.97, "grad_norm": 1.497205629063281, "learning_rate": 2.0013374458077718e-09, "loss": 0.0418, "step": 10893 }, { "epoch": 2.97, "grad_norm": 1.34771897200011, "learning_rate": 1.959862784577937e-09, "loss": 0.0376, "step": 10894 }, { "epoch": 2.97, "grad_norm": 1.7462236523832473, "learning_rate": 1.9188223000915496e-09, "loss": 0.0551, "step": 10895 }, { "epoch": 2.97, "grad_norm": 1.5678687103542155, "learning_rate": 1.8782159959140898e-09, "loss": 0.0457, "step": 10896 }, { "epoch": 2.97, "grad_norm": 1.5797821269564256, "learning_rate": 1.8380438755738472e-09, "loss": 0.0452, "step": 10897 }, { "epoch": 2.98, "grad_norm": 1.4492515090472524, "learning_rate": 1.798305942559142e-09, "loss": 0.041, "step": 10898 }, { "epoch": 2.98, "grad_norm": 1.5478121971483612, "learning_rate": 1.759002200322768e-09, "loss": 0.0433, "step": 10899 }, { "epoch": 2.98, "grad_norm": 1.5204610573668493, "learning_rate": 1.720132652278661e-09, "loss": 0.0477, "step": 10900 }, { "epoch": 2.98, "grad_norm": 1.4364415337562024, "learning_rate": 1.6816973018035642e-09, "loss": 0.0409, "step": 10901 }, { "epoch": 2.98, "grad_norm": 1.7175872077160956, "learning_rate": 1.6436961522364737e-09, "loss": 0.052, "step": 10902 }, { "epoch": 2.98, "grad_norm": 1.8682631936416245, "learning_rate": 1.6061292068786372e-09, "loss": 0.0597, "step": 10903 }, { "epoch": 2.98, "grad_norm": 1.3354585640062273, "learning_rate": 1.5689964689935555e-09, "loss": 0.04, "step": 10904 }, { "epoch": 2.98, "grad_norm": 1.891281490714632, "learning_rate": 1.5322979418058714e-09, "loss": 0.0561, "step": 10905 }, { "epoch": 2.98, "grad_norm": 1.2928408089002037, "learning_rate": 1.4960336285047005e-09, "loss": 0.0349, "step": 10906 }, { "epoch": 2.98, "grad_norm": 1.420691371532058, "learning_rate": 1.4602035322397456e-09, "loss": 0.0422, "step": 10907 }, { "epoch": 2.98, "grad_norm": 1.477545062235664, "learning_rate": 1.424807656124072e-09, "loss": 0.0397, "step": 10908 }, { "epoch": 2.98, "grad_norm": 1.5958664640141942, "learning_rate": 1.3898460032318872e-09, "loss": 0.0537, "step": 10909 }, { "epoch": 2.98, "grad_norm": 1.3871411859594693, "learning_rate": 1.3553185766007616e-09, "loss": 0.0407, "step": 10910 }, { "epoch": 2.98, "grad_norm": 1.3130826320213982, "learning_rate": 1.3212253792299624e-09, "loss": 0.0387, "step": 10911 }, { "epoch": 2.98, "grad_norm": 1.667983640787204, "learning_rate": 1.2875664140804545e-09, "loss": 0.0528, "step": 10912 }, { "epoch": 2.98, "grad_norm": 1.2482181672550523, "learning_rate": 1.2543416840771206e-09, "loss": 0.0434, "step": 10913 }, { "epoch": 2.98, "grad_norm": 1.6023723463161241, "learning_rate": 1.2215511921059852e-09, "loss": 0.0471, "step": 10914 }, { "epoch": 2.98, "grad_norm": 1.3836231954049276, "learning_rate": 1.189194941015326e-09, "loss": 0.039, "step": 10915 }, { "epoch": 2.98, "grad_norm": 1.4885924304484868, "learning_rate": 1.157272933615672e-09, "loss": 0.0522, "step": 10916 }, { "epoch": 2.98, "grad_norm": 1.4915465173248015, "learning_rate": 1.1257851726809154e-09, "loss": 0.0463, "step": 10917 }, { "epoch": 2.98, "grad_norm": 1.351826167693264, "learning_rate": 1.094731660945536e-09, "loss": 0.0334, "step": 10918 }, { "epoch": 2.98, "grad_norm": 1.5812667154922224, "learning_rate": 1.0641124011068205e-09, "loss": 0.0527, "step": 10919 }, { "epoch": 2.98, "grad_norm": 1.594638045291966, "learning_rate": 1.0339273958259732e-09, "loss": 0.0448, "step": 10920 }, { "epoch": 2.98, "grad_norm": 1.3517937994813007, "learning_rate": 1.004176647724231e-09, "loss": 0.0364, "step": 10921 }, { "epoch": 2.98, "grad_norm": 1.5341344236005836, "learning_rate": 9.748601593861929e-10, "loss": 0.0411, "step": 10922 }, { "epoch": 2.98, "grad_norm": 1.4796411696306626, "learning_rate": 9.459779333587104e-10, "loss": 0.0476, "step": 10923 }, { "epoch": 2.98, "grad_norm": 1.5099239128241653, "learning_rate": 9.175299721503328e-10, "loss": 0.0491, "step": 10924 }, { "epoch": 2.98, "grad_norm": 1.5823395724123108, "learning_rate": 8.895162782324163e-10, "loss": 0.0459, "step": 10925 }, { "epoch": 2.98, "grad_norm": 1.5086060090207962, "learning_rate": 8.619368540391248e-10, "loss": 0.038, "step": 10926 }, { "epoch": 2.98, "grad_norm": 1.5210457148475258, "learning_rate": 8.347917019657647e-10, "loss": 0.0468, "step": 10927 }, { "epoch": 2.98, "grad_norm": 1.4443675287545779, "learning_rate": 8.080808243704496e-10, "loss": 0.0444, "step": 10928 }, { "epoch": 2.98, "grad_norm": 1.3457093965889122, "learning_rate": 7.818042235735457e-10, "loss": 0.0398, "step": 10929 }, { "epoch": 2.98, "grad_norm": 1.27019573383568, "learning_rate": 7.559619018576714e-10, "loss": 0.035, "step": 10930 }, { "epoch": 2.98, "grad_norm": 1.7464677059014857, "learning_rate": 7.305538614682528e-10, "loss": 0.0532, "step": 10931 }, { "epoch": 2.98, "grad_norm": 1.5521159457023526, "learning_rate": 7.055801046113031e-10, "loss": 0.0508, "step": 10932 }, { "epoch": 2.98, "grad_norm": 1.7303688372664419, "learning_rate": 6.810406334573084e-10, "loss": 0.0534, "step": 10933 }, { "epoch": 2.98, "grad_norm": 1.355671809820054, "learning_rate": 6.569354501378966e-10, "loss": 0.0383, "step": 10934 }, { "epoch": 2.99, "grad_norm": 1.4943001310646458, "learning_rate": 6.332645567463935e-10, "loss": 0.0392, "step": 10935 }, { "epoch": 2.99, "grad_norm": 1.5269695855631886, "learning_rate": 6.100279553400424e-10, "loss": 0.0448, "step": 10936 }, { "epoch": 2.99, "grad_norm": 1.6549043342421343, "learning_rate": 5.872256479361182e-10, "loss": 0.0401, "step": 10937 }, { "epoch": 2.99, "grad_norm": 1.5735513500680245, "learning_rate": 5.648576365169245e-10, "loss": 0.0384, "step": 10938 }, { "epoch": 2.99, "grad_norm": 1.4356358875042432, "learning_rate": 5.429239230242411e-10, "loss": 0.0465, "step": 10939 }, { "epoch": 2.99, "grad_norm": 1.4486137075123762, "learning_rate": 5.214245093643211e-10, "loss": 0.0406, "step": 10940 }, { "epoch": 2.99, "grad_norm": 1.4488762550065764, "learning_rate": 5.003593974045596e-10, "loss": 0.0406, "step": 10941 }, { "epoch": 2.99, "grad_norm": 1.389571561467369, "learning_rate": 4.797285889746039e-10, "loss": 0.0368, "step": 10942 }, { "epoch": 2.99, "grad_norm": 1.596618911010025, "learning_rate": 4.5953208586690947e-10, "loss": 0.0504, "step": 10943 }, { "epoch": 2.99, "grad_norm": 1.7320840699695563, "learning_rate": 4.397698898361835e-10, "loss": 0.0525, "step": 10944 }, { "epoch": 2.99, "grad_norm": 1.5823792318049978, "learning_rate": 4.2044200259883095e-10, "loss": 0.0496, "step": 10945 }, { "epoch": 2.99, "grad_norm": 1.397425095126363, "learning_rate": 4.0154842583350895e-10, "loss": 0.0474, "step": 10946 }, { "epoch": 2.99, "grad_norm": 1.6356798080497905, "learning_rate": 3.830891611822374e-10, "loss": 0.0496, "step": 10947 }, { "epoch": 2.99, "grad_norm": 1.6431303351633053, "learning_rate": 3.6506421024762315e-10, "loss": 0.0487, "step": 10948 }, { "epoch": 2.99, "grad_norm": 1.4735679237357644, "learning_rate": 3.4747357459674614e-10, "loss": 0.0452, "step": 10949 }, { "epoch": 2.99, "grad_norm": 1.2572602536817783, "learning_rate": 3.303172557561629e-10, "loss": 0.0399, "step": 10950 }, { "epoch": 2.99, "grad_norm": 1.3881766214196973, "learning_rate": 3.1359525521801326e-10, "loss": 0.0346, "step": 10951 }, { "epoch": 2.99, "grad_norm": 1.6869686987053554, "learning_rate": 2.9730757443335867e-10, "loss": 0.0472, "step": 10952 }, { "epoch": 2.99, "grad_norm": 1.7139200856000782, "learning_rate": 2.814542148177335e-10, "loss": 0.0458, "step": 10953 }, { "epoch": 2.99, "grad_norm": 1.534488477017455, "learning_rate": 2.660351777483694e-10, "loss": 0.0435, "step": 10954 }, { "epoch": 2.99, "grad_norm": 1.624282335540079, "learning_rate": 2.5105046456475047e-10, "loss": 0.0499, "step": 10955 }, { "epoch": 2.99, "grad_norm": 1.3362921800439473, "learning_rate": 2.3650007656805804e-10, "loss": 0.0421, "step": 10956 }, { "epoch": 2.99, "grad_norm": 1.4230969674878908, "learning_rate": 2.2238401502339136e-10, "loss": 0.0468, "step": 10957 }, { "epoch": 2.99, "grad_norm": 1.844619776161194, "learning_rate": 2.0870228115588142e-10, "loss": 0.0503, "step": 10958 }, { "epoch": 2.99, "grad_norm": 1.430522189451049, "learning_rate": 1.9545487615402204e-10, "loss": 0.0407, "step": 10959 }, { "epoch": 2.99, "grad_norm": 1.3522080482763055, "learning_rate": 1.8264180116966956e-10, "loss": 0.0386, "step": 10960 }, { "epoch": 2.99, "grad_norm": 1.5747266886203286, "learning_rate": 1.702630573152675e-10, "loss": 0.0372, "step": 10961 }, { "epoch": 2.99, "grad_norm": 1.3986460141251411, "learning_rate": 1.583186456660668e-10, "loss": 0.0464, "step": 10962 }, { "epoch": 2.99, "grad_norm": 1.435492933928032, "learning_rate": 1.4680856725957094e-10, "loss": 0.0384, "step": 10963 }, { "epoch": 2.99, "grad_norm": 1.2943468033829362, "learning_rate": 1.3573282309609082e-10, "loss": 0.0382, "step": 10964 }, { "epoch": 2.99, "grad_norm": 1.4257102002799198, "learning_rate": 1.250914141370796e-10, "loss": 0.0409, "step": 10965 }, { "epoch": 2.99, "grad_norm": 1.5938799952554827, "learning_rate": 1.1488434130790815e-10, "loss": 0.0462, "step": 10966 }, { "epoch": 2.99, "grad_norm": 1.5987602633605487, "learning_rate": 1.0511160549453448e-10, "loss": 0.0489, "step": 10967 }, { "epoch": 2.99, "grad_norm": 1.786183730671755, "learning_rate": 9.577320754627917e-11, "loss": 0.0528, "step": 10968 }, { "epoch": 2.99, "grad_norm": 1.46937398014273, "learning_rate": 8.686914827416015e-11, "loss": 0.0483, "step": 10969 }, { "epoch": 2.99, "grad_norm": 1.1937786460409998, "learning_rate": 7.839942845144777e-11, "loss": 0.0341, "step": 10970 }, { "epoch": 3.0, "grad_norm": 1.44079617756281, "learning_rate": 7.036404881421988e-11, "loss": 0.0434, "step": 10971 }, { "epoch": 3.0, "grad_norm": 1.7941135845578364, "learning_rate": 6.276301006080676e-11, "loss": 0.0495, "step": 10972 }, { "epoch": 3.0, "grad_norm": 1.5234947691103542, "learning_rate": 5.55963128506809e-11, "loss": 0.0427, "step": 10973 }, { "epoch": 3.0, "grad_norm": 1.445840999807596, "learning_rate": 4.886395780723252e-11, "loss": 0.0334, "step": 10974 }, { "epoch": 3.0, "grad_norm": 1.550023526099395, "learning_rate": 4.256594551499405e-11, "loss": 0.0411, "step": 10975 }, { "epoch": 3.0, "grad_norm": 1.3735763245528048, "learning_rate": 3.6702276520750316e-11, "loss": 0.0434, "step": 10976 }, { "epoch": 3.0, "grad_norm": 1.6566842345750339, "learning_rate": 3.127295133409369e-11, "loss": 0.041, "step": 10977 }, { "epoch": 3.0, "grad_norm": 1.3697366871544565, "learning_rate": 2.6277970426868972e-11, "loss": 0.0471, "step": 10978 }, { "epoch": 3.0, "grad_norm": 1.3988212399486728, "learning_rate": 2.1717334232618235e-11, "loss": 0.0416, "step": 10979 }, { "epoch": 3.0, "grad_norm": 1.6106847688772814, "learning_rate": 1.7591043148246222e-11, "loss": 0.0551, "step": 10980 }, { "epoch": 3.0, "grad_norm": 1.5175493867414471, "learning_rate": 1.3899097531244744e-11, "loss": 0.0467, "step": 10981 }, { "epoch": 3.0, "grad_norm": 1.883069086105246, "learning_rate": 1.0641497703023363e-11, "loss": 0.0578, "step": 10982 }, { "epoch": 3.0, "grad_norm": 1.5729843921762448, "learning_rate": 7.818243946133841e-12, "loss": 0.0428, "step": 10983 }, { "epoch": 3.0, "grad_norm": 1.5778767449570963, "learning_rate": 5.4293365059354676e-12, "loss": 0.0438, "step": 10984 }, { "epoch": 3.0, "grad_norm": 1.6096525037457174, "learning_rate": 3.474775590039947e-12, "loss": 0.0407, "step": 10985 }, { "epoch": 3.0, "grad_norm": 1.584474226349137, "learning_rate": 1.9545613683114027e-12, "loss": 0.0505, "step": 10986 }, { "epoch": 3.0, "grad_norm": 1.4404310764741348, "learning_rate": 8.686939728663746e-13, "loss": 0.0386, "step": 10987 }, { "epoch": 3.0, "grad_norm": 1.524871018292121, "learning_rate": 2.1717349807381937e-13, "loss": 0.0404, "step": 10988 }, { "epoch": 3.0, "grad_norm": 1.3380365262766343, "learning_rate": 0.0, "loss": 0.0335, "step": 10989 }, { "epoch": 3.0, "step": 10989, "total_flos": 1316175770025984.0, "train_loss": 0.13379205959944065, "train_runtime": 24670.7836, "train_samples_per_second": 28.505, "train_steps_per_second": 0.445 } ], "logging_steps": 1.0, "max_steps": 10989, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 24000, "total_flos": 1316175770025984.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }