{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 6198, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00016134236850596966, "grad_norm": 5.788793972032065, "learning_rate": 3.225806451612903e-09, "loss": 1.9956, "step": 1 }, { "epoch": 0.00032268473701193933, "grad_norm": 4.520575426658685, "learning_rate": 6.451612903225806e-09, "loss": 1.4105, "step": 2 }, { "epoch": 0.000484027105517909, "grad_norm": 4.563518462753021, "learning_rate": 9.677419354838709e-09, "loss": 1.7146, "step": 3 }, { "epoch": 0.0006453694740238787, "grad_norm": 4.712714370991914, "learning_rate": 1.2903225806451612e-08, "loss": 1.6855, "step": 4 }, { "epoch": 0.0008067118425298483, "grad_norm": 6.5548481245217936, "learning_rate": 1.6129032258064514e-08, "loss": 1.9005, "step": 5 }, { "epoch": 0.000968054211035818, "grad_norm": 7.630702403523713, "learning_rate": 1.9354838709677418e-08, "loss": 1.6795, "step": 6 }, { "epoch": 0.0011293965795417876, "grad_norm": 17.38725540131694, "learning_rate": 2.258064516129032e-08, "loss": 1.8937, "step": 7 }, { "epoch": 0.0012907389480477573, "grad_norm": 4.8782247979111295, "learning_rate": 2.5806451612903225e-08, "loss": 1.7892, "step": 8 }, { "epoch": 0.001452081316553727, "grad_norm": 4.679698708445989, "learning_rate": 2.9032258064516128e-08, "loss": 1.9711, "step": 9 }, { "epoch": 0.0016134236850596966, "grad_norm": 5.371314093491684, "learning_rate": 3.225806451612903e-08, "loss": 1.8013, "step": 10 }, { "epoch": 0.0017747660535656663, "grad_norm": 3.541591793091638, "learning_rate": 3.548387096774193e-08, "loss": 1.5029, "step": 11 }, { "epoch": 0.001936108422071636, "grad_norm": 3.7768020320334985, "learning_rate": 3.8709677419354835e-08, "loss": 1.7048, "step": 12 }, { "epoch": 0.002097450790577606, "grad_norm": 4.211285804965452, "learning_rate": 4.1935483870967746e-08, "loss": 1.6643, "step": 13 }, { "epoch": 0.002258793159083575, "grad_norm": 5.544287193445018, "learning_rate": 4.516129032258064e-08, "loss": 1.8919, "step": 14 }, { "epoch": 0.002420135527589545, "grad_norm": 3.813999819054922, "learning_rate": 4.8387096774193546e-08, "loss": 1.5513, "step": 15 }, { "epoch": 0.0025814778960955146, "grad_norm": 3.9360085947749264, "learning_rate": 5.161290322580645e-08, "loss": 1.693, "step": 16 }, { "epoch": 0.0027428202646014844, "grad_norm": 4.744444157771375, "learning_rate": 5.483870967741935e-08, "loss": 1.8296, "step": 17 }, { "epoch": 0.002904162633107454, "grad_norm": 4.389236610791528, "learning_rate": 5.8064516129032257e-08, "loss": 1.9921, "step": 18 }, { "epoch": 0.003065505001613424, "grad_norm": 4.8847515681025255, "learning_rate": 6.129032258064515e-08, "loss": 1.7721, "step": 19 }, { "epoch": 0.003226847370119393, "grad_norm": 11.146895560632496, "learning_rate": 6.451612903225806e-08, "loss": 2.0095, "step": 20 }, { "epoch": 0.003388189738625363, "grad_norm": 3.627144540923611, "learning_rate": 6.774193548387097e-08, "loss": 1.4996, "step": 21 }, { "epoch": 0.0035495321071313327, "grad_norm": 4.564206574963007, "learning_rate": 7.096774193548386e-08, "loss": 1.5905, "step": 22 }, { "epoch": 0.0037108744756373024, "grad_norm": 3.881697005178875, "learning_rate": 7.419354838709677e-08, "loss": 1.8742, "step": 23 }, { "epoch": 0.003872216844143272, "grad_norm": 4.611839747533905, "learning_rate": 7.741935483870967e-08, "loss": 1.7792, "step": 24 }, { "epoch": 0.004033559212649242, "grad_norm": 5.3091592494513815, "learning_rate": 8.064516129032257e-08, "loss": 2.0078, "step": 25 }, { "epoch": 0.004194901581155212, "grad_norm": 4.426390374496642, "learning_rate": 8.387096774193549e-08, "loss": 1.7993, "step": 26 }, { "epoch": 0.004356243949661181, "grad_norm": 3.657478061070485, "learning_rate": 8.709677419354838e-08, "loss": 1.6662, "step": 27 }, { "epoch": 0.00451758631816715, "grad_norm": 3.991125155825165, "learning_rate": 9.032258064516128e-08, "loss": 1.6518, "step": 28 }, { "epoch": 0.00467892868667312, "grad_norm": 4.854913853235592, "learning_rate": 9.354838709677419e-08, "loss": 1.8567, "step": 29 }, { "epoch": 0.00484027105517909, "grad_norm": 4.060864632904237, "learning_rate": 9.677419354838709e-08, "loss": 1.7893, "step": 30 }, { "epoch": 0.0050016134236850595, "grad_norm": 3.9443844147414033, "learning_rate": 1e-07, "loss": 1.7545, "step": 31 }, { "epoch": 0.005162955792191029, "grad_norm": 4.9697001766284705, "learning_rate": 1.032258064516129e-07, "loss": 1.719, "step": 32 }, { "epoch": 0.005324298160696999, "grad_norm": 4.733645946405672, "learning_rate": 1.064516129032258e-07, "loss": 1.8155, "step": 33 }, { "epoch": 0.005485640529202969, "grad_norm": 8.842423086740528, "learning_rate": 1.096774193548387e-07, "loss": 1.8661, "step": 34 }, { "epoch": 0.0056469828977089385, "grad_norm": 3.6231720360372304, "learning_rate": 1.1290322580645161e-07, "loss": 1.3051, "step": 35 }, { "epoch": 0.005808325266214908, "grad_norm": 5.702007863617676, "learning_rate": 1.1612903225806451e-07, "loss": 1.8527, "step": 36 }, { "epoch": 0.005969667634720878, "grad_norm": 5.260571689650577, "learning_rate": 1.1935483870967742e-07, "loss": 2.0233, "step": 37 }, { "epoch": 0.006131010003226848, "grad_norm": 9.95996012361809, "learning_rate": 1.225806451612903e-07, "loss": 1.7453, "step": 38 }, { "epoch": 0.0062923523717328175, "grad_norm": 4.94578745219999, "learning_rate": 1.258064516129032e-07, "loss": 2.0631, "step": 39 }, { "epoch": 0.006453694740238786, "grad_norm": 8.493225988710797, "learning_rate": 1.2903225806451611e-07, "loss": 1.7374, "step": 40 }, { "epoch": 0.006615037108744756, "grad_norm": 5.850671269955442, "learning_rate": 1.3225806451612903e-07, "loss": 1.6602, "step": 41 }, { "epoch": 0.006776379477250726, "grad_norm": 3.6691078960267096, "learning_rate": 1.3548387096774195e-07, "loss": 1.4596, "step": 42 }, { "epoch": 0.006937721845756696, "grad_norm": 5.208085545049703, "learning_rate": 1.3870967741935484e-07, "loss": 1.8263, "step": 43 }, { "epoch": 0.007099064214262665, "grad_norm": 4.966246352022772, "learning_rate": 1.4193548387096773e-07, "loss": 1.8818, "step": 44 }, { "epoch": 0.007260406582768635, "grad_norm": 7.385889838968552, "learning_rate": 1.4516129032258064e-07, "loss": 1.6078, "step": 45 }, { "epoch": 0.007421748951274605, "grad_norm": 5.36499440615061, "learning_rate": 1.4838709677419353e-07, "loss": 1.5422, "step": 46 }, { "epoch": 0.0075830913197805746, "grad_norm": 6.41429474162754, "learning_rate": 1.5161290322580645e-07, "loss": 1.8462, "step": 47 }, { "epoch": 0.007744433688286544, "grad_norm": 3.543511228237607, "learning_rate": 1.5483870967741934e-07, "loss": 1.6753, "step": 48 }, { "epoch": 0.007905776056792513, "grad_norm": 3.28812509257946, "learning_rate": 1.5806451612903223e-07, "loss": 1.6117, "step": 49 }, { "epoch": 0.008067118425298484, "grad_norm": 5.203095123846753, "learning_rate": 1.6129032258064515e-07, "loss": 2.2053, "step": 50 }, { "epoch": 0.008228460793804453, "grad_norm": 3.859035708690987, "learning_rate": 1.6451612903225807e-07, "loss": 1.5459, "step": 51 }, { "epoch": 0.008389803162310423, "grad_norm": 4.33281731589786, "learning_rate": 1.6774193548387098e-07, "loss": 1.7748, "step": 52 }, { "epoch": 0.008551145530816392, "grad_norm": 4.697186037527423, "learning_rate": 1.7096774193548385e-07, "loss": 1.706, "step": 53 }, { "epoch": 0.008712487899322363, "grad_norm": 3.6560045469660625, "learning_rate": 1.7419354838709676e-07, "loss": 1.9459, "step": 54 }, { "epoch": 0.008873830267828332, "grad_norm": 3.592033440670006, "learning_rate": 1.7741935483870968e-07, "loss": 1.5217, "step": 55 }, { "epoch": 0.0090351726363343, "grad_norm": 4.688754918282232, "learning_rate": 1.8064516129032257e-07, "loss": 1.8734, "step": 56 }, { "epoch": 0.009196515004840271, "grad_norm": 5.5257585928199076, "learning_rate": 1.8387096774193546e-07, "loss": 1.5954, "step": 57 }, { "epoch": 0.00935785737334624, "grad_norm": 4.973076238523789, "learning_rate": 1.8709677419354838e-07, "loss": 1.922, "step": 58 }, { "epoch": 0.00951919974185221, "grad_norm": 3.2635412549601925, "learning_rate": 1.9032258064516127e-07, "loss": 1.5218, "step": 59 }, { "epoch": 0.00968054211035818, "grad_norm": 4.012879616911554, "learning_rate": 1.9354838709677418e-07, "loss": 1.6178, "step": 60 }, { "epoch": 0.00984188447886415, "grad_norm": 6.363187729515348, "learning_rate": 1.967741935483871e-07, "loss": 1.8973, "step": 61 }, { "epoch": 0.010003226847370119, "grad_norm": 6.1506622826239274, "learning_rate": 2e-07, "loss": 1.8299, "step": 62 }, { "epoch": 0.01016456921587609, "grad_norm": 3.613631607802824, "learning_rate": 2.0322580645161288e-07, "loss": 1.8069, "step": 63 }, { "epoch": 0.010325911584382058, "grad_norm": 4.465723039579968, "learning_rate": 2.064516129032258e-07, "loss": 1.7449, "step": 64 }, { "epoch": 0.010487253952888029, "grad_norm": 3.4736105741486627, "learning_rate": 2.0967741935483871e-07, "loss": 1.5098, "step": 65 }, { "epoch": 0.010648596321393998, "grad_norm": 3.497019179630017, "learning_rate": 2.129032258064516e-07, "loss": 1.623, "step": 66 }, { "epoch": 0.010809938689899969, "grad_norm": 4.089446623284425, "learning_rate": 2.161290322580645e-07, "loss": 1.5099, "step": 67 }, { "epoch": 0.010971281058405937, "grad_norm": 3.6965780208222143, "learning_rate": 2.193548387096774e-07, "loss": 1.3635, "step": 68 }, { "epoch": 0.011132623426911906, "grad_norm": 4.820157980644828, "learning_rate": 2.2258064516129033e-07, "loss": 1.669, "step": 69 }, { "epoch": 0.011293965795417877, "grad_norm": 6.611779607460969, "learning_rate": 2.2580645161290322e-07, "loss": 1.6914, "step": 70 }, { "epoch": 0.011455308163923846, "grad_norm": 4.270050771085337, "learning_rate": 2.290322580645161e-07, "loss": 1.7723, "step": 71 }, { "epoch": 0.011616650532429816, "grad_norm": 3.3168145637188786, "learning_rate": 2.3225806451612903e-07, "loss": 1.7036, "step": 72 }, { "epoch": 0.011777992900935785, "grad_norm": 12.202493531597215, "learning_rate": 2.3548387096774192e-07, "loss": 1.9771, "step": 73 }, { "epoch": 0.011939335269441756, "grad_norm": 3.8898189582961713, "learning_rate": 2.3870967741935483e-07, "loss": 1.5985, "step": 74 }, { "epoch": 0.012100677637947725, "grad_norm": 9.283573566207126, "learning_rate": 2.4193548387096775e-07, "loss": 1.589, "step": 75 }, { "epoch": 0.012262020006453695, "grad_norm": 4.117135613999435, "learning_rate": 2.451612903225806e-07, "loss": 1.6724, "step": 76 }, { "epoch": 0.012423362374959664, "grad_norm": 3.2333605368607015, "learning_rate": 2.4838709677419353e-07, "loss": 1.4483, "step": 77 }, { "epoch": 0.012584704743465635, "grad_norm": 4.244854224120447, "learning_rate": 2.516129032258064e-07, "loss": 1.7895, "step": 78 }, { "epoch": 0.012746047111971604, "grad_norm": 3.8086538456797223, "learning_rate": 2.548387096774193e-07, "loss": 1.6483, "step": 79 }, { "epoch": 0.012907389480477573, "grad_norm": 3.6868589458255077, "learning_rate": 2.5806451612903223e-07, "loss": 1.4, "step": 80 }, { "epoch": 0.013068731848983543, "grad_norm": 9.258873472300733, "learning_rate": 2.6129032258064514e-07, "loss": 1.7886, "step": 81 }, { "epoch": 0.013230074217489512, "grad_norm": 3.6641840477407484, "learning_rate": 2.6451612903225806e-07, "loss": 1.4921, "step": 82 }, { "epoch": 0.013391416585995483, "grad_norm": 3.493591026551795, "learning_rate": 2.67741935483871e-07, "loss": 1.5781, "step": 83 }, { "epoch": 0.013552758954501452, "grad_norm": 3.570209393170656, "learning_rate": 2.709677419354839e-07, "loss": 1.4227, "step": 84 }, { "epoch": 0.013714101323007422, "grad_norm": 3.589992781607934, "learning_rate": 2.7419354838709676e-07, "loss": 1.3303, "step": 85 }, { "epoch": 0.013875443691513391, "grad_norm": 5.365510414779604, "learning_rate": 2.774193548387097e-07, "loss": 1.6217, "step": 86 }, { "epoch": 0.014036786060019362, "grad_norm": 5.819765533758205, "learning_rate": 2.8064516129032254e-07, "loss": 1.3192, "step": 87 }, { "epoch": 0.01419812842852533, "grad_norm": 2.69273939182415, "learning_rate": 2.8387096774193546e-07, "loss": 1.3804, "step": 88 }, { "epoch": 0.014359470797031301, "grad_norm": 4.018052373109001, "learning_rate": 2.8709677419354837e-07, "loss": 1.617, "step": 89 }, { "epoch": 0.01452081316553727, "grad_norm": 2.722501074420307, "learning_rate": 2.903225806451613e-07, "loss": 1.3739, "step": 90 }, { "epoch": 0.014682155534043239, "grad_norm": 3.0037976705425153, "learning_rate": 2.9354838709677415e-07, "loss": 1.3558, "step": 91 }, { "epoch": 0.01484349790254921, "grad_norm": 2.9071242135368296, "learning_rate": 2.9677419354838707e-07, "loss": 1.3904, "step": 92 }, { "epoch": 0.015004840271055178, "grad_norm": 2.9460493628153164, "learning_rate": 3e-07, "loss": 1.6165, "step": 93 }, { "epoch": 0.015166182639561149, "grad_norm": 2.9300566173720703, "learning_rate": 3.032258064516129e-07, "loss": 1.2695, "step": 94 }, { "epoch": 0.015327525008067118, "grad_norm": 3.3191575139601355, "learning_rate": 3.064516129032258e-07, "loss": 1.2265, "step": 95 }, { "epoch": 0.015488867376573089, "grad_norm": 7.997256046830341, "learning_rate": 3.096774193548387e-07, "loss": 1.5263, "step": 96 }, { "epoch": 0.01565020974507906, "grad_norm": 8.788723083723983, "learning_rate": 3.129032258064516e-07, "loss": 1.361, "step": 97 }, { "epoch": 0.015811552113585026, "grad_norm": 4.301097889399777, "learning_rate": 3.1612903225806446e-07, "loss": 1.4139, "step": 98 }, { "epoch": 0.015972894482090997, "grad_norm": 2.624513126816483, "learning_rate": 3.193548387096774e-07, "loss": 1.2086, "step": 99 }, { "epoch": 0.016134236850596968, "grad_norm": 2.7065106836795567, "learning_rate": 3.225806451612903e-07, "loss": 1.2114, "step": 100 }, { "epoch": 0.016295579219102935, "grad_norm": 3.41210350555806, "learning_rate": 3.258064516129032e-07, "loss": 1.1313, "step": 101 }, { "epoch": 0.016456921587608905, "grad_norm": 2.9645240018210424, "learning_rate": 3.2903225806451613e-07, "loss": 1.2837, "step": 102 }, { "epoch": 0.016618263956114876, "grad_norm": 4.245910472125465, "learning_rate": 3.3225806451612905e-07, "loss": 1.1694, "step": 103 }, { "epoch": 0.016779606324620847, "grad_norm": 3.6200851797124916, "learning_rate": 3.3548387096774196e-07, "loss": 1.3824, "step": 104 }, { "epoch": 0.016940948693126814, "grad_norm": 3.1907932995562662, "learning_rate": 3.387096774193548e-07, "loss": 1.1224, "step": 105 }, { "epoch": 0.017102291061632784, "grad_norm": 3.2099396948928236, "learning_rate": 3.419354838709677e-07, "loss": 1.3917, "step": 106 }, { "epoch": 0.017263633430138755, "grad_norm": 5.458387748311133, "learning_rate": 3.451612903225806e-07, "loss": 1.163, "step": 107 }, { "epoch": 0.017424975798644726, "grad_norm": 5.9412550446119505, "learning_rate": 3.483870967741935e-07, "loss": 1.4185, "step": 108 }, { "epoch": 0.017586318167150693, "grad_norm": 2.6027443523922806, "learning_rate": 3.5161290322580644e-07, "loss": 1.3595, "step": 109 }, { "epoch": 0.017747660535656663, "grad_norm": 3.576401474487151, "learning_rate": 3.5483870967741936e-07, "loss": 1.3424, "step": 110 }, { "epoch": 0.017909002904162634, "grad_norm": 3.3982082245755985, "learning_rate": 3.580645161290323e-07, "loss": 1.1676, "step": 111 }, { "epoch": 0.0180703452726686, "grad_norm": 3.3042957263064707, "learning_rate": 3.6129032258064514e-07, "loss": 1.2925, "step": 112 }, { "epoch": 0.01823168764117457, "grad_norm": 3.0495487317045806, "learning_rate": 3.6451612903225806e-07, "loss": 1.4959, "step": 113 }, { "epoch": 0.018393030009680542, "grad_norm": 3.9766507541778324, "learning_rate": 3.677419354838709e-07, "loss": 1.33, "step": 114 }, { "epoch": 0.018554372378186513, "grad_norm": 3.8063875821809385, "learning_rate": 3.7096774193548384e-07, "loss": 0.8667, "step": 115 }, { "epoch": 0.01871571474669248, "grad_norm": 3.032683672519829, "learning_rate": 3.7419354838709675e-07, "loss": 1.3348, "step": 116 }, { "epoch": 0.01887705711519845, "grad_norm": 3.4278896198818525, "learning_rate": 3.7741935483870967e-07, "loss": 1.1034, "step": 117 }, { "epoch": 0.01903839948370442, "grad_norm": 3.5796670714136374, "learning_rate": 3.8064516129032253e-07, "loss": 1.3067, "step": 118 }, { "epoch": 0.019199741852210392, "grad_norm": 2.91142403042716, "learning_rate": 3.8387096774193545e-07, "loss": 1.5945, "step": 119 }, { "epoch": 0.01936108422071636, "grad_norm": 3.032819204111762, "learning_rate": 3.8709677419354837e-07, "loss": 1.1688, "step": 120 }, { "epoch": 0.01952242658922233, "grad_norm": 4.770405157384495, "learning_rate": 3.903225806451613e-07, "loss": 1.3691, "step": 121 }, { "epoch": 0.0196837689577283, "grad_norm": 3.7509157969889264, "learning_rate": 3.935483870967742e-07, "loss": 1.4908, "step": 122 }, { "epoch": 0.01984511132623427, "grad_norm": 2.4967982770951136, "learning_rate": 3.967741935483871e-07, "loss": 0.9454, "step": 123 }, { "epoch": 0.020006453694740238, "grad_norm": 4.067112812729655, "learning_rate": 4e-07, "loss": 1.1973, "step": 124 }, { "epoch": 0.02016779606324621, "grad_norm": 8.29668603024561, "learning_rate": 4.0322580645161285e-07, "loss": 1.2088, "step": 125 }, { "epoch": 0.02032913843175218, "grad_norm": 6.549625811189641, "learning_rate": 4.0645161290322576e-07, "loss": 1.036, "step": 126 }, { "epoch": 0.020490480800258146, "grad_norm": 2.2295143236622823, "learning_rate": 4.096774193548387e-07, "loss": 1.2679, "step": 127 }, { "epoch": 0.020651823168764117, "grad_norm": 3.873678658915716, "learning_rate": 4.129032258064516e-07, "loss": 1.2749, "step": 128 }, { "epoch": 0.020813165537270088, "grad_norm": 2.465207225763098, "learning_rate": 4.161290322580645e-07, "loss": 1.0776, "step": 129 }, { "epoch": 0.020974507905776058, "grad_norm": 3.106069984055351, "learning_rate": 4.1935483870967743e-07, "loss": 1.1162, "step": 130 }, { "epoch": 0.021135850274282025, "grad_norm": 2.9618276098597205, "learning_rate": 4.2258064516129035e-07, "loss": 1.1104, "step": 131 }, { "epoch": 0.021297192642787996, "grad_norm": 2.718449279991172, "learning_rate": 4.258064516129032e-07, "loss": 0.9403, "step": 132 }, { "epoch": 0.021458535011293967, "grad_norm": 2.620390250429252, "learning_rate": 4.290322580645161e-07, "loss": 0.8513, "step": 133 }, { "epoch": 0.021619877379799937, "grad_norm": 4.426329185727695, "learning_rate": 4.32258064516129e-07, "loss": 1.0885, "step": 134 }, { "epoch": 0.021781219748305904, "grad_norm": 2.392305568818672, "learning_rate": 4.354838709677419e-07, "loss": 1.1057, "step": 135 }, { "epoch": 0.021942562116811875, "grad_norm": 2.8405656697169683, "learning_rate": 4.387096774193548e-07, "loss": 1.0332, "step": 136 }, { "epoch": 0.022103904485317846, "grad_norm": 4.502597059424818, "learning_rate": 4.4193548387096774e-07, "loss": 1.1407, "step": 137 }, { "epoch": 0.022265246853823813, "grad_norm": 3.1919236248219938, "learning_rate": 4.4516129032258066e-07, "loss": 1.1414, "step": 138 }, { "epoch": 0.022426589222329783, "grad_norm": 2.210762151767352, "learning_rate": 4.483870967741935e-07, "loss": 0.668, "step": 139 }, { "epoch": 0.022587931590835754, "grad_norm": 3.026937661775737, "learning_rate": 4.5161290322580644e-07, "loss": 1.1889, "step": 140 }, { "epoch": 0.022749273959341725, "grad_norm": 2.5895111192581264, "learning_rate": 4.5483870967741935e-07, "loss": 0.9892, "step": 141 }, { "epoch": 0.02291061632784769, "grad_norm": 2.490195407936544, "learning_rate": 4.580645161290322e-07, "loss": 1.0482, "step": 142 }, { "epoch": 0.023071958696353662, "grad_norm": 2.6662829142853557, "learning_rate": 4.6129032258064514e-07, "loss": 1.0667, "step": 143 }, { "epoch": 0.023233301064859633, "grad_norm": 2.7960692369186466, "learning_rate": 4.6451612903225805e-07, "loss": 0.9262, "step": 144 }, { "epoch": 0.023394643433365604, "grad_norm": 3.138258920504174, "learning_rate": 4.677419354838709e-07, "loss": 1.0709, "step": 145 }, { "epoch": 0.02355598580187157, "grad_norm": 3.783226064318038, "learning_rate": 4.7096774193548383e-07, "loss": 1.2286, "step": 146 }, { "epoch": 0.02371732817037754, "grad_norm": 2.829992531271201, "learning_rate": 4.7419354838709675e-07, "loss": 0.9988, "step": 147 }, { "epoch": 0.023878670538883512, "grad_norm": 2.0789461736275365, "learning_rate": 4.774193548387097e-07, "loss": 0.9081, "step": 148 }, { "epoch": 0.02404001290738948, "grad_norm": 2.6463109483939333, "learning_rate": 4.806451612903226e-07, "loss": 1.1732, "step": 149 }, { "epoch": 0.02420135527589545, "grad_norm": 2.353964355237772, "learning_rate": 4.838709677419355e-07, "loss": 0.7956, "step": 150 }, { "epoch": 0.02436269764440142, "grad_norm": 6.758143281283075, "learning_rate": 4.870967741935484e-07, "loss": 0.8824, "step": 151 }, { "epoch": 0.02452404001290739, "grad_norm": 2.154280052914829, "learning_rate": 4.903225806451612e-07, "loss": 1.1576, "step": 152 }, { "epoch": 0.024685382381413358, "grad_norm": 2.4311339559049303, "learning_rate": 4.935483870967741e-07, "loss": 0.9638, "step": 153 }, { "epoch": 0.02484672474991933, "grad_norm": 2.495892965870636, "learning_rate": 4.967741935483871e-07, "loss": 0.9736, "step": 154 }, { "epoch": 0.0250080671184253, "grad_norm": 1.9350505234778406, "learning_rate": 5e-07, "loss": 1.0357, "step": 155 }, { "epoch": 0.02516940948693127, "grad_norm": 4.7505298118254125, "learning_rate": 5.032258064516128e-07, "loss": 0.7975, "step": 156 }, { "epoch": 0.025330751855437237, "grad_norm": 1.9505435821741317, "learning_rate": 5.064516129032258e-07, "loss": 0.7383, "step": 157 }, { "epoch": 0.025492094223943208, "grad_norm": 3.1460668380340304, "learning_rate": 5.096774193548386e-07, "loss": 0.9171, "step": 158 }, { "epoch": 0.025653436592449178, "grad_norm": 3.7616833836684456, "learning_rate": 5.129032258064516e-07, "loss": 1.0094, "step": 159 }, { "epoch": 0.025814778960955145, "grad_norm": 3.3922792960827732, "learning_rate": 5.161290322580645e-07, "loss": 0.9701, "step": 160 }, { "epoch": 0.025976121329461116, "grad_norm": 2.6353130295068876, "learning_rate": 5.193548387096775e-07, "loss": 0.9922, "step": 161 }, { "epoch": 0.026137463697967087, "grad_norm": 2.0760641563512965, "learning_rate": 5.225806451612903e-07, "loss": 0.8148, "step": 162 }, { "epoch": 0.026298806066473057, "grad_norm": 3.4435352403862844, "learning_rate": 5.258064516129032e-07, "loss": 1.2506, "step": 163 }, { "epoch": 0.026460148434979024, "grad_norm": 2.9222909559894132, "learning_rate": 5.290322580645161e-07, "loss": 1.0159, "step": 164 }, { "epoch": 0.026621490803484995, "grad_norm": 4.414658335753238, "learning_rate": 5.322580645161289e-07, "loss": 0.9918, "step": 165 }, { "epoch": 0.026782833171990966, "grad_norm": 1.5579707303511383, "learning_rate": 5.35483870967742e-07, "loss": 0.6064, "step": 166 }, { "epoch": 0.026944175540496936, "grad_norm": 2.6053579326894756, "learning_rate": 5.387096774193548e-07, "loss": 0.8325, "step": 167 }, { "epoch": 0.027105517909002903, "grad_norm": 2.810579364686907, "learning_rate": 5.419354838709678e-07, "loss": 0.9933, "step": 168 }, { "epoch": 0.027266860277508874, "grad_norm": 2.301013358411015, "learning_rate": 5.451612903225806e-07, "loss": 0.9007, "step": 169 }, { "epoch": 0.027428202646014845, "grad_norm": 1.981604015599457, "learning_rate": 5.483870967741935e-07, "loss": 0.9347, "step": 170 }, { "epoch": 0.02758954501452081, "grad_norm": 3.435145317235488, "learning_rate": 5.516129032258064e-07, "loss": 1.2029, "step": 171 }, { "epoch": 0.027750887383026782, "grad_norm": 2.6274341922725486, "learning_rate": 5.548387096774194e-07, "loss": 0.8358, "step": 172 }, { "epoch": 0.027912229751532753, "grad_norm": 2.1821924397951973, "learning_rate": 5.580645161290323e-07, "loss": 1.1253, "step": 173 }, { "epoch": 0.028073572120038724, "grad_norm": 2.1333971724296465, "learning_rate": 5.612903225806451e-07, "loss": 0.823, "step": 174 }, { "epoch": 0.02823491448854469, "grad_norm": 2.498655052804707, "learning_rate": 5.645161290322581e-07, "loss": 0.9799, "step": 175 }, { "epoch": 0.02839625685705066, "grad_norm": 2.1562339671547845, "learning_rate": 5.677419354838709e-07, "loss": 0.8249, "step": 176 }, { "epoch": 0.028557599225556632, "grad_norm": 2.3445343739565994, "learning_rate": 5.709677419354838e-07, "loss": 1.0831, "step": 177 }, { "epoch": 0.028718941594062602, "grad_norm": 2.4856100791078535, "learning_rate": 5.741935483870967e-07, "loss": 0.9914, "step": 178 }, { "epoch": 0.02888028396256857, "grad_norm": 2.1925431020886683, "learning_rate": 5.774193548387097e-07, "loss": 0.6721, "step": 179 }, { "epoch": 0.02904162633107454, "grad_norm": 3.237545063191221, "learning_rate": 5.806451612903226e-07, "loss": 1.0723, "step": 180 }, { "epoch": 0.02920296869958051, "grad_norm": 2.334969718785767, "learning_rate": 5.838709677419355e-07, "loss": 1.0558, "step": 181 }, { "epoch": 0.029364311068086478, "grad_norm": 4.823736968288528, "learning_rate": 5.870967741935483e-07, "loss": 0.8625, "step": 182 }, { "epoch": 0.02952565343659245, "grad_norm": 2.0869687849268455, "learning_rate": 5.903225806451612e-07, "loss": 0.7055, "step": 183 }, { "epoch": 0.02968699580509842, "grad_norm": 1.767961289832153, "learning_rate": 5.935483870967741e-07, "loss": 0.6819, "step": 184 }, { "epoch": 0.02984833817360439, "grad_norm": 5.619303552773501, "learning_rate": 5.967741935483871e-07, "loss": 0.732, "step": 185 }, { "epoch": 0.030009680542110357, "grad_norm": 2.262742728179822, "learning_rate": 6e-07, "loss": 0.9489, "step": 186 }, { "epoch": 0.030171022910616328, "grad_norm": 2.6189439032607233, "learning_rate": 6.032258064516129e-07, "loss": 0.7124, "step": 187 }, { "epoch": 0.030332365279122298, "grad_norm": 2.2701419396073796, "learning_rate": 6.064516129032258e-07, "loss": 1.1363, "step": 188 }, { "epoch": 0.03049370764762827, "grad_norm": 1.9569950062332804, "learning_rate": 6.096774193548386e-07, "loss": 0.6393, "step": 189 }, { "epoch": 0.030655050016134236, "grad_norm": 2.2150953038984085, "learning_rate": 6.129032258064516e-07, "loss": 0.812, "step": 190 }, { "epoch": 0.030816392384640207, "grad_norm": 2.201441886202408, "learning_rate": 6.161290322580645e-07, "loss": 1.0356, "step": 191 }, { "epoch": 0.030977734753146177, "grad_norm": 2.8606629519604785, "learning_rate": 6.193548387096774e-07, "loss": 1.0683, "step": 192 }, { "epoch": 0.031139077121652144, "grad_norm": 2.9818446432026415, "learning_rate": 6.225806451612903e-07, "loss": 0.8209, "step": 193 }, { "epoch": 0.03130041949015812, "grad_norm": 2.2553516956130237, "learning_rate": 6.258064516129032e-07, "loss": 0.8455, "step": 194 }, { "epoch": 0.03146176185866408, "grad_norm": 3.486670315402026, "learning_rate": 6.290322580645161e-07, "loss": 1.2474, "step": 195 }, { "epoch": 0.03162310422717005, "grad_norm": 2.801822059296277, "learning_rate": 6.322580645161289e-07, "loss": 0.8092, "step": 196 }, { "epoch": 0.03178444659567602, "grad_norm": 2.6467005792682383, "learning_rate": 6.35483870967742e-07, "loss": 0.9022, "step": 197 }, { "epoch": 0.031945788964181994, "grad_norm": 2.3950932451544804, "learning_rate": 6.387096774193548e-07, "loss": 0.7969, "step": 198 }, { "epoch": 0.032107131332687965, "grad_norm": 2.686593013571063, "learning_rate": 6.419354838709678e-07, "loss": 0.9398, "step": 199 }, { "epoch": 0.032268473701193935, "grad_norm": 2.28594883924424, "learning_rate": 6.451612903225806e-07, "loss": 1.0304, "step": 200 }, { "epoch": 0.032429816069699906, "grad_norm": 2.9360042578642944, "learning_rate": 6.483870967741935e-07, "loss": 1.2477, "step": 201 }, { "epoch": 0.03259115843820587, "grad_norm": 2.8326790746122468, "learning_rate": 6.516129032258064e-07, "loss": 0.8836, "step": 202 }, { "epoch": 0.03275250080671184, "grad_norm": 1.706709982103783, "learning_rate": 6.548387096774192e-07, "loss": 0.5708, "step": 203 }, { "epoch": 0.03291384317521781, "grad_norm": 2.190607425113111, "learning_rate": 6.580645161290323e-07, "loss": 0.8953, "step": 204 }, { "epoch": 0.03307518554372378, "grad_norm": 2.064179863433891, "learning_rate": 6.612903225806451e-07, "loss": 0.675, "step": 205 }, { "epoch": 0.03323652791222975, "grad_norm": 2.1798368013172906, "learning_rate": 6.645161290322581e-07, "loss": 0.9153, "step": 206 }, { "epoch": 0.03339787028073572, "grad_norm": 2.0929217621988516, "learning_rate": 6.677419354838709e-07, "loss": 0.7816, "step": 207 }, { "epoch": 0.03355921264924169, "grad_norm": 2.0189099416764726, "learning_rate": 6.709677419354839e-07, "loss": 1.055, "step": 208 }, { "epoch": 0.033720555017747664, "grad_norm": 5.267324654404034, "learning_rate": 6.741935483870967e-07, "loss": 0.9458, "step": 209 }, { "epoch": 0.03388189738625363, "grad_norm": 2.196016789203661, "learning_rate": 6.774193548387096e-07, "loss": 0.8492, "step": 210 }, { "epoch": 0.0340432397547596, "grad_norm": 2.5308706682214948, "learning_rate": 6.806451612903226e-07, "loss": 1.1794, "step": 211 }, { "epoch": 0.03420458212326557, "grad_norm": 2.322854855203985, "learning_rate": 6.838709677419354e-07, "loss": 0.7326, "step": 212 }, { "epoch": 0.03436592449177154, "grad_norm": 2.4898439108708175, "learning_rate": 6.870967741935484e-07, "loss": 0.8995, "step": 213 }, { "epoch": 0.03452726686027751, "grad_norm": 2.127056921465846, "learning_rate": 6.903225806451612e-07, "loss": 0.629, "step": 214 }, { "epoch": 0.03468860922878348, "grad_norm": 2.2819189893567247, "learning_rate": 6.935483870967742e-07, "loss": 0.8577, "step": 215 }, { "epoch": 0.03484995159728945, "grad_norm": 2.432441254834623, "learning_rate": 6.96774193548387e-07, "loss": 0.9638, "step": 216 }, { "epoch": 0.035011293965795415, "grad_norm": 2.0122903845003917, "learning_rate": 7e-07, "loss": 0.6343, "step": 217 }, { "epoch": 0.035172636334301385, "grad_norm": 2.8952149936255105, "learning_rate": 7.032258064516129e-07, "loss": 0.8572, "step": 218 }, { "epoch": 0.035333978702807356, "grad_norm": 2.0586896436097537, "learning_rate": 7.064516129032257e-07, "loss": 0.7967, "step": 219 }, { "epoch": 0.03549532107131333, "grad_norm": 1.9580299737679778, "learning_rate": 7.096774193548387e-07, "loss": 0.865, "step": 220 }, { "epoch": 0.0356566634398193, "grad_norm": 1.950850543348598, "learning_rate": 7.129032258064515e-07, "loss": 0.8049, "step": 221 }, { "epoch": 0.03581800580832527, "grad_norm": 2.0719790103173237, "learning_rate": 7.161290322580646e-07, "loss": 0.8767, "step": 222 }, { "epoch": 0.03597934817683124, "grad_norm": 1.781418173363369, "learning_rate": 7.193548387096774e-07, "loss": 0.7285, "step": 223 }, { "epoch": 0.0361406905453372, "grad_norm": 1.9864284073713037, "learning_rate": 7.225806451612903e-07, "loss": 0.9211, "step": 224 }, { "epoch": 0.03630203291384317, "grad_norm": 1.9919543559961583, "learning_rate": 7.258064516129032e-07, "loss": 0.9257, "step": 225 }, { "epoch": 0.03646337528234914, "grad_norm": 1.8964618716883876, "learning_rate": 7.290322580645161e-07, "loss": 0.7309, "step": 226 }, { "epoch": 0.036624717650855114, "grad_norm": 2.1739316574626626, "learning_rate": 7.32258064516129e-07, "loss": 0.8549, "step": 227 }, { "epoch": 0.036786060019361085, "grad_norm": 1.7882878654627594, "learning_rate": 7.354838709677418e-07, "loss": 0.8374, "step": 228 }, { "epoch": 0.036947402387867055, "grad_norm": 2.079336286619551, "learning_rate": 7.387096774193549e-07, "loss": 0.9454, "step": 229 }, { "epoch": 0.037108744756373026, "grad_norm": 2.034882334838598, "learning_rate": 7.419354838709677e-07, "loss": 0.7747, "step": 230 }, { "epoch": 0.037270087124878996, "grad_norm": 2.345699567577704, "learning_rate": 7.451612903225806e-07, "loss": 0.7367, "step": 231 }, { "epoch": 0.03743142949338496, "grad_norm": 1.9280029851803284, "learning_rate": 7.483870967741935e-07, "loss": 0.9219, "step": 232 }, { "epoch": 0.03759277186189093, "grad_norm": 1.460050997300652, "learning_rate": 7.516129032258064e-07, "loss": 0.7758, "step": 233 }, { "epoch": 0.0377541142303969, "grad_norm": 2.2437167916988363, "learning_rate": 7.548387096774193e-07, "loss": 0.8879, "step": 234 }, { "epoch": 0.03791545659890287, "grad_norm": 1.7759644238929653, "learning_rate": 7.580645161290323e-07, "loss": 0.6436, "step": 235 }, { "epoch": 0.03807679896740884, "grad_norm": 1.9296616154838149, "learning_rate": 7.612903225806451e-07, "loss": 0.5289, "step": 236 }, { "epoch": 0.03823814133591481, "grad_norm": 2.33150140056204, "learning_rate": 7.645161290322581e-07, "loss": 0.7616, "step": 237 }, { "epoch": 0.038399483704420784, "grad_norm": 2.003466106062408, "learning_rate": 7.677419354838709e-07, "loss": 0.7834, "step": 238 }, { "epoch": 0.03856082607292675, "grad_norm": 2.8563271209522214, "learning_rate": 7.709677419354838e-07, "loss": 0.9498, "step": 239 }, { "epoch": 0.03872216844143272, "grad_norm": 2.20641227752141, "learning_rate": 7.741935483870967e-07, "loss": 0.6893, "step": 240 }, { "epoch": 0.03888351080993869, "grad_norm": 2.039083312648213, "learning_rate": 7.774193548387097e-07, "loss": 0.6805, "step": 241 }, { "epoch": 0.03904485317844466, "grad_norm": 2.5293397642056212, "learning_rate": 7.806451612903226e-07, "loss": 0.9504, "step": 242 }, { "epoch": 0.03920619554695063, "grad_norm": 2.0484446556728684, "learning_rate": 7.838709677419354e-07, "loss": 0.8001, "step": 243 }, { "epoch": 0.0393675379154566, "grad_norm": 2.1675478414955736, "learning_rate": 7.870967741935484e-07, "loss": 0.8343, "step": 244 }, { "epoch": 0.03952888028396257, "grad_norm": 2.139603642630708, "learning_rate": 7.903225806451612e-07, "loss": 1.1367, "step": 245 }, { "epoch": 0.03969022265246854, "grad_norm": 2.3701403239110754, "learning_rate": 7.935483870967742e-07, "loss": 1.0214, "step": 246 }, { "epoch": 0.039851565020974505, "grad_norm": 1.7643116480243421, "learning_rate": 7.96774193548387e-07, "loss": 0.8868, "step": 247 }, { "epoch": 0.040012907389480476, "grad_norm": 2.4570063118207295, "learning_rate": 8e-07, "loss": 0.8207, "step": 248 }, { "epoch": 0.04017424975798645, "grad_norm": 1.8126200274483508, "learning_rate": 8.032258064516129e-07, "loss": 0.9299, "step": 249 }, { "epoch": 0.04033559212649242, "grad_norm": 1.6132980973885755, "learning_rate": 8.064516129032257e-07, "loss": 0.7825, "step": 250 }, { "epoch": 0.04049693449499839, "grad_norm": 4.63916526981253, "learning_rate": 8.096774193548387e-07, "loss": 0.7924, "step": 251 }, { "epoch": 0.04065827686350436, "grad_norm": 63.53797846195022, "learning_rate": 8.129032258064515e-07, "loss": 0.7374, "step": 252 }, { "epoch": 0.04081961923201033, "grad_norm": 4.38714505142815, "learning_rate": 8.161290322580645e-07, "loss": 0.7837, "step": 253 }, { "epoch": 0.04098096160051629, "grad_norm": 2.5208475622190645, "learning_rate": 8.193548387096774e-07, "loss": 0.8325, "step": 254 }, { "epoch": 0.04114230396902226, "grad_norm": 9.10577625097457, "learning_rate": 8.225806451612904e-07, "loss": 1.1135, "step": 255 }, { "epoch": 0.041303646337528234, "grad_norm": 2.230752101877965, "learning_rate": 8.258064516129032e-07, "loss": 0.6964, "step": 256 }, { "epoch": 0.041464988706034205, "grad_norm": 2.8775013532726508, "learning_rate": 8.29032258064516e-07, "loss": 0.8825, "step": 257 }, { "epoch": 0.041626331074540175, "grad_norm": 3.304515644772379, "learning_rate": 8.32258064516129e-07, "loss": 0.8939, "step": 258 }, { "epoch": 0.041787673443046146, "grad_norm": 3.4178395622907125, "learning_rate": 8.354838709677418e-07, "loss": 0.8368, "step": 259 }, { "epoch": 0.041949015811552116, "grad_norm": 2.6840262029878166, "learning_rate": 8.387096774193549e-07, "loss": 0.7118, "step": 260 }, { "epoch": 0.04211035818005808, "grad_norm": 2.2591907558003483, "learning_rate": 8.419354838709677e-07, "loss": 0.7121, "step": 261 }, { "epoch": 0.04227170054856405, "grad_norm": 2.0678373692092054, "learning_rate": 8.451612903225807e-07, "loss": 0.6893, "step": 262 }, { "epoch": 0.04243304291707002, "grad_norm": 12.344310583591929, "learning_rate": 8.483870967741935e-07, "loss": 0.8802, "step": 263 }, { "epoch": 0.04259438528557599, "grad_norm": 2.902402073722107, "learning_rate": 8.516129032258064e-07, "loss": 0.7886, "step": 264 }, { "epoch": 0.04275572765408196, "grad_norm": 2.2971280535957717, "learning_rate": 8.548387096774193e-07, "loss": 0.5957, "step": 265 }, { "epoch": 0.04291707002258793, "grad_norm": 2.690153630489074, "learning_rate": 8.580645161290321e-07, "loss": 0.9613, "step": 266 }, { "epoch": 0.043078412391093904, "grad_norm": 2.7634391120509934, "learning_rate": 8.612903225806452e-07, "loss": 0.7341, "step": 267 }, { "epoch": 0.043239754759599874, "grad_norm": 32.32932385101895, "learning_rate": 8.64516129032258e-07, "loss": 0.7924, "step": 268 }, { "epoch": 0.04340109712810584, "grad_norm": 2.222257179409158, "learning_rate": 8.67741935483871e-07, "loss": 0.7378, "step": 269 }, { "epoch": 0.04356243949661181, "grad_norm": 124.25375685970224, "learning_rate": 8.709677419354838e-07, "loss": 1.1549, "step": 270 }, { "epoch": 0.04372378186511778, "grad_norm": 3.9981808101878773, "learning_rate": 8.741935483870967e-07, "loss": 0.7621, "step": 271 }, { "epoch": 0.04388512423362375, "grad_norm": 550.3470495968885, "learning_rate": 8.774193548387096e-07, "loss": 1.9166, "step": 272 }, { "epoch": 0.04404646660212972, "grad_norm": 4.096261688113308, "learning_rate": 8.806451612903226e-07, "loss": 0.8609, "step": 273 }, { "epoch": 0.04420780897063569, "grad_norm": 5.251832188199449, "learning_rate": 8.838709677419355e-07, "loss": 0.7776, "step": 274 }, { "epoch": 0.04436915133914166, "grad_norm": 8.13920736141843, "learning_rate": 8.870967741935483e-07, "loss": 1.111, "step": 275 }, { "epoch": 0.044530493707647625, "grad_norm": 184.2673626486796, "learning_rate": 8.903225806451613e-07, "loss": 1.1943, "step": 276 }, { "epoch": 0.044691836076153596, "grad_norm": 3.091244231888526, "learning_rate": 8.935483870967741e-07, "loss": 0.9258, "step": 277 }, { "epoch": 0.04485317844465957, "grad_norm": 3.487349809810293, "learning_rate": 8.96774193548387e-07, "loss": 0.7713, "step": 278 }, { "epoch": 0.04501452081316554, "grad_norm": 2.9095763172682547, "learning_rate": 9e-07, "loss": 0.8677, "step": 279 }, { "epoch": 0.04517586318167151, "grad_norm": 5.545183122533174, "learning_rate": 9.032258064516129e-07, "loss": 0.9129, "step": 280 }, { "epoch": 0.04533720555017748, "grad_norm": 2.2566022434176474, "learning_rate": 9.064516129032258e-07, "loss": 0.696, "step": 281 }, { "epoch": 0.04549854791868345, "grad_norm": 3.19293646855623, "learning_rate": 9.096774193548387e-07, "loss": 0.7739, "step": 282 }, { "epoch": 0.04565989028718941, "grad_norm": 13.242474992270079, "learning_rate": 9.129032258064516e-07, "loss": 1.2064, "step": 283 }, { "epoch": 0.04582123265569538, "grad_norm": 3.159760288852275, "learning_rate": 9.161290322580644e-07, "loss": 0.8169, "step": 284 }, { "epoch": 0.045982575024201354, "grad_norm": 4.061951820362027, "learning_rate": 9.193548387096774e-07, "loss": 0.7972, "step": 285 }, { "epoch": 0.046143917392707325, "grad_norm": 2.3114437062123154, "learning_rate": 9.225806451612903e-07, "loss": 0.6283, "step": 286 }, { "epoch": 0.046305259761213295, "grad_norm": 13.981924104362612, "learning_rate": 9.258064516129032e-07, "loss": 0.665, "step": 287 }, { "epoch": 0.046466602129719266, "grad_norm": 3.0044152829330666, "learning_rate": 9.290322580645161e-07, "loss": 0.7552, "step": 288 }, { "epoch": 0.046627944498225236, "grad_norm": 5.938214068389711, "learning_rate": 9.32258064516129e-07, "loss": 0.9882, "step": 289 }, { "epoch": 0.04678928686673121, "grad_norm": 2.68417551989212, "learning_rate": 9.354838709677418e-07, "loss": 0.5813, "step": 290 }, { "epoch": 0.04695062923523717, "grad_norm": 1.908106353398766, "learning_rate": 9.387096774193549e-07, "loss": 0.6553, "step": 291 }, { "epoch": 0.04711197160374314, "grad_norm": 2.497176101347307, "learning_rate": 9.419354838709677e-07, "loss": 0.8253, "step": 292 }, { "epoch": 0.04727331397224911, "grad_norm": 3.385949352122196, "learning_rate": 9.451612903225806e-07, "loss": 0.8106, "step": 293 }, { "epoch": 0.04743465634075508, "grad_norm": 2.7811533021725983, "learning_rate": 9.483870967741935e-07, "loss": 0.6888, "step": 294 }, { "epoch": 0.04759599870926105, "grad_norm": 9.208711181989626, "learning_rate": 9.516129032258064e-07, "loss": 1.1734, "step": 295 }, { "epoch": 0.047757341077767024, "grad_norm": 5.661524215731352, "learning_rate": 9.548387096774193e-07, "loss": 0.866, "step": 296 }, { "epoch": 0.047918683446272994, "grad_norm": 3.1408739892749447, "learning_rate": 9.580645161290321e-07, "loss": 1.0734, "step": 297 }, { "epoch": 0.04808002581477896, "grad_norm": 2.5160974094535056, "learning_rate": 9.612903225806452e-07, "loss": 0.7273, "step": 298 }, { "epoch": 0.04824136818328493, "grad_norm": 3.06547837524054, "learning_rate": 9.64516129032258e-07, "loss": 0.7652, "step": 299 }, { "epoch": 0.0484027105517909, "grad_norm": 2.496298147318869, "learning_rate": 9.67741935483871e-07, "loss": 0.8945, "step": 300 }, { "epoch": 0.04856405292029687, "grad_norm": 5.407399138110925, "learning_rate": 9.709677419354838e-07, "loss": 0.7604, "step": 301 }, { "epoch": 0.04872539528880284, "grad_norm": 11.665381079187094, "learning_rate": 9.741935483870968e-07, "loss": 0.8971, "step": 302 }, { "epoch": 0.04888673765730881, "grad_norm": 2.791310833320591, "learning_rate": 9.774193548387096e-07, "loss": 0.9404, "step": 303 }, { "epoch": 0.04904808002581478, "grad_norm": 4.3806941443010015, "learning_rate": 9.806451612903225e-07, "loss": 0.6338, "step": 304 }, { "epoch": 0.049209422394320745, "grad_norm": 2.9754780874059237, "learning_rate": 9.838709677419355e-07, "loss": 0.9071, "step": 305 }, { "epoch": 0.049370764762826716, "grad_norm": 3.7276741465076455, "learning_rate": 9.870967741935483e-07, "loss": 0.7439, "step": 306 }, { "epoch": 0.04953210713133269, "grad_norm": 2.4991923935106506, "learning_rate": 9.903225806451613e-07, "loss": 0.7325, "step": 307 }, { "epoch": 0.04969344949983866, "grad_norm": 2.1716939727206324, "learning_rate": 9.935483870967741e-07, "loss": 0.8379, "step": 308 }, { "epoch": 0.04985479186834463, "grad_norm": 1.9290163672322869, "learning_rate": 9.967741935483871e-07, "loss": 0.684, "step": 309 }, { "epoch": 0.0500161342368506, "grad_norm": 2.9254423996974808, "learning_rate": 1e-06, "loss": 0.8637, "step": 310 }, { "epoch": 0.05017747660535657, "grad_norm": 2.085689801428876, "learning_rate": 9.999999288288242e-07, "loss": 0.8183, "step": 311 }, { "epoch": 0.05033881897386254, "grad_norm": 2.5666487998670817, "learning_rate": 9.999997153153168e-07, "loss": 0.9378, "step": 312 }, { "epoch": 0.0505001613423685, "grad_norm": 2.3984839220555827, "learning_rate": 9.99999359459539e-07, "loss": 0.8485, "step": 313 }, { "epoch": 0.050661503710874474, "grad_norm": 3.4882757821205748, "learning_rate": 9.999988612615915e-07, "loss": 0.8127, "step": 314 }, { "epoch": 0.050822846079380445, "grad_norm": 3.4380972603471305, "learning_rate": 9.999982207216167e-07, "loss": 0.9265, "step": 315 }, { "epoch": 0.050984188447886415, "grad_norm": 2.618610871836785, "learning_rate": 9.999974378397966e-07, "loss": 0.8993, "step": 316 }, { "epoch": 0.051145530816392386, "grad_norm": 2.8531727046258304, "learning_rate": 9.999965126163541e-07, "loss": 0.8761, "step": 317 }, { "epoch": 0.051306873184898356, "grad_norm": 2.4730672630297548, "learning_rate": 9.999954450515528e-07, "loss": 0.5698, "step": 318 }, { "epoch": 0.05146821555340433, "grad_norm": 1.8786022551026034, "learning_rate": 9.999942351456968e-07, "loss": 0.6653, "step": 319 }, { "epoch": 0.05162955792191029, "grad_norm": 2.642975071812939, "learning_rate": 9.999928828991297e-07, "loss": 0.8709, "step": 320 }, { "epoch": 0.05179090029041626, "grad_norm": 2.955792871984638, "learning_rate": 9.999913883122376e-07, "loss": 0.6951, "step": 321 }, { "epoch": 0.05195224265892223, "grad_norm": 2.4836424701300293, "learning_rate": 9.99989751385445e-07, "loss": 0.8685, "step": 322 }, { "epoch": 0.0521135850274282, "grad_norm": 5.5847477426140575, "learning_rate": 9.999879721192184e-07, "loss": 0.5852, "step": 323 }, { "epoch": 0.05227492739593417, "grad_norm": 2.564022170359296, "learning_rate": 9.999860505140643e-07, "loss": 0.8553, "step": 324 }, { "epoch": 0.052436269764440144, "grad_norm": 2.382920459740516, "learning_rate": 9.999839865705295e-07, "loss": 0.6368, "step": 325 }, { "epoch": 0.052597612132946114, "grad_norm": 2.3362869460611932, "learning_rate": 9.99981780289202e-07, "loss": 0.7127, "step": 326 }, { "epoch": 0.05275895450145208, "grad_norm": 2.02815186375488, "learning_rate": 9.999794316707095e-07, "loss": 0.7709, "step": 327 }, { "epoch": 0.05292029686995805, "grad_norm": 2.1858649546519535, "learning_rate": 9.999769407157208e-07, "loss": 0.7775, "step": 328 }, { "epoch": 0.05308163923846402, "grad_norm": 2.5605787192313323, "learning_rate": 9.999743074249452e-07, "loss": 0.7173, "step": 329 }, { "epoch": 0.05324298160696999, "grad_norm": 2.6352352235678533, "learning_rate": 9.999715317991319e-07, "loss": 0.7511, "step": 330 }, { "epoch": 0.05340432397547596, "grad_norm": 2.2318213749032645, "learning_rate": 9.999686138390714e-07, "loss": 0.5739, "step": 331 }, { "epoch": 0.05356566634398193, "grad_norm": 3.8598790631101303, "learning_rate": 9.999655535455945e-07, "loss": 0.9988, "step": 332 }, { "epoch": 0.0537270087124879, "grad_norm": 2.720994045151514, "learning_rate": 9.999623509195722e-07, "loss": 0.9349, "step": 333 }, { "epoch": 0.05388835108099387, "grad_norm": 2.268406705919261, "learning_rate": 9.999590059619162e-07, "loss": 0.7443, "step": 334 }, { "epoch": 0.054049693449499836, "grad_norm": 2.6854825098820823, "learning_rate": 9.99955518673579e-07, "loss": 0.6933, "step": 335 }, { "epoch": 0.05421103581800581, "grad_norm": 2.1428126830075485, "learning_rate": 9.999518890555531e-07, "loss": 0.812, "step": 336 }, { "epoch": 0.05437237818651178, "grad_norm": 2.379497886712887, "learning_rate": 9.999481171088722e-07, "loss": 0.7211, "step": 337 }, { "epoch": 0.05453372055501775, "grad_norm": 2.2248817926788926, "learning_rate": 9.999442028346097e-07, "loss": 0.7276, "step": 338 }, { "epoch": 0.05469506292352372, "grad_norm": 1.8325785976275462, "learning_rate": 9.999401462338799e-07, "loss": 0.7321, "step": 339 }, { "epoch": 0.05485640529202969, "grad_norm": 1.801505714706009, "learning_rate": 9.999359473078383e-07, "loss": 0.6057, "step": 340 }, { "epoch": 0.05501774766053566, "grad_norm": 4.263915673143147, "learning_rate": 9.999316060576792e-07, "loss": 0.9751, "step": 341 }, { "epoch": 0.05517909002904162, "grad_norm": 3.502106305254485, "learning_rate": 9.999271224846395e-07, "loss": 0.921, "step": 342 }, { "epoch": 0.055340432397547594, "grad_norm": 2.0533258255701576, "learning_rate": 9.999224965899951e-07, "loss": 0.8179, "step": 343 }, { "epoch": 0.055501774766053565, "grad_norm": 2.2932841979167238, "learning_rate": 9.99917728375063e-07, "loss": 0.5532, "step": 344 }, { "epoch": 0.055663117134559535, "grad_norm": 2.844325919049513, "learning_rate": 9.999128178412007e-07, "loss": 0.7234, "step": 345 }, { "epoch": 0.055824459503065506, "grad_norm": 2.196119926999582, "learning_rate": 9.99907764989806e-07, "loss": 0.6305, "step": 346 }, { "epoch": 0.055985801871571476, "grad_norm": 2.5372322880879117, "learning_rate": 9.999025698223176e-07, "loss": 0.7915, "step": 347 }, { "epoch": 0.05614714424007745, "grad_norm": 3.6022158214295983, "learning_rate": 9.998972323402145e-07, "loss": 1.0068, "step": 348 }, { "epoch": 0.05630848660858341, "grad_norm": 2.943350302739834, "learning_rate": 9.998917525450156e-07, "loss": 0.5541, "step": 349 }, { "epoch": 0.05646982897708938, "grad_norm": 2.401489105790808, "learning_rate": 9.998861304382818e-07, "loss": 0.9578, "step": 350 }, { "epoch": 0.05663117134559535, "grad_norm": 4.124509146413975, "learning_rate": 9.99880366021613e-07, "loss": 0.7374, "step": 351 }, { "epoch": 0.05679251371410132, "grad_norm": 12.133162749492156, "learning_rate": 9.998744592966506e-07, "loss": 0.6796, "step": 352 }, { "epoch": 0.05695385608260729, "grad_norm": 1.8528394199216578, "learning_rate": 9.998684102650761e-07, "loss": 0.5698, "step": 353 }, { "epoch": 0.057115198451113264, "grad_norm": 2.3902368168039905, "learning_rate": 9.998622189286112e-07, "loss": 0.6389, "step": 354 }, { "epoch": 0.057276540819619234, "grad_norm": 2.103224540270695, "learning_rate": 9.99855885289019e-07, "loss": 0.6197, "step": 355 }, { "epoch": 0.057437883188125205, "grad_norm": 1.8695695438759232, "learning_rate": 9.99849409348102e-07, "loss": 0.8821, "step": 356 }, { "epoch": 0.05759922555663117, "grad_norm": 2.065125153007199, "learning_rate": 9.998427911077045e-07, "loss": 0.6302, "step": 357 }, { "epoch": 0.05776056792513714, "grad_norm": 5.369695308004512, "learning_rate": 9.9983603056971e-07, "loss": 0.5915, "step": 358 }, { "epoch": 0.05792191029364311, "grad_norm": 3.077398533949636, "learning_rate": 9.998291277360435e-07, "loss": 0.701, "step": 359 }, { "epoch": 0.05808325266214908, "grad_norm": 1.9469432200424057, "learning_rate": 9.9982208260867e-07, "loss": 0.7705, "step": 360 }, { "epoch": 0.05824459503065505, "grad_norm": 2.415494623650814, "learning_rate": 9.99814895189595e-07, "loss": 0.8986, "step": 361 }, { "epoch": 0.05840593739916102, "grad_norm": 2.3559695291928864, "learning_rate": 9.99807565480865e-07, "loss": 0.742, "step": 362 }, { "epoch": 0.05856727976766699, "grad_norm": 2.2724812912039716, "learning_rate": 9.998000934845663e-07, "loss": 0.6664, "step": 363 }, { "epoch": 0.058728622136172956, "grad_norm": 2.704993776183804, "learning_rate": 9.997924792028264e-07, "loss": 0.7397, "step": 364 }, { "epoch": 0.05888996450467893, "grad_norm": 3.8723622850590886, "learning_rate": 9.997847226378126e-07, "loss": 0.6861, "step": 365 }, { "epoch": 0.0590513068731849, "grad_norm": 2.2460876995502748, "learning_rate": 9.997768237917332e-07, "loss": 0.6632, "step": 366 }, { "epoch": 0.05921264924169087, "grad_norm": 2.3043010840318505, "learning_rate": 9.99768782666837e-07, "loss": 0.7292, "step": 367 }, { "epoch": 0.05937399161019684, "grad_norm": 2.350119823079413, "learning_rate": 9.997605992654134e-07, "loss": 0.6929, "step": 368 }, { "epoch": 0.05953533397870281, "grad_norm": 2.3424258751474514, "learning_rate": 9.997522735897915e-07, "loss": 0.7901, "step": 369 }, { "epoch": 0.05969667634720878, "grad_norm": 2.2924230078827756, "learning_rate": 9.99743805642342e-07, "loss": 0.4803, "step": 370 }, { "epoch": 0.05985801871571474, "grad_norm": 2.4438364857543156, "learning_rate": 9.99735195425475e-07, "loss": 0.8299, "step": 371 }, { "epoch": 0.060019361084220714, "grad_norm": 2.2611694723101414, "learning_rate": 9.997264429416426e-07, "loss": 0.9006, "step": 372 }, { "epoch": 0.060180703452726685, "grad_norm": 2.263465325556983, "learning_rate": 9.997175481933358e-07, "loss": 0.6222, "step": 373 }, { "epoch": 0.060342045821232655, "grad_norm": 1.9430420276182134, "learning_rate": 9.99708511183087e-07, "loss": 0.7868, "step": 374 }, { "epoch": 0.060503388189738626, "grad_norm": 2.887750959843854, "learning_rate": 9.996993319134689e-07, "loss": 0.943, "step": 375 }, { "epoch": 0.060664730558244596, "grad_norm": 2.572284996753413, "learning_rate": 9.996900103870946e-07, "loss": 0.7313, "step": 376 }, { "epoch": 0.06082607292675057, "grad_norm": 2.5528957604876887, "learning_rate": 9.99680546606618e-07, "loss": 0.732, "step": 377 }, { "epoch": 0.06098741529525654, "grad_norm": 1.8210673588461004, "learning_rate": 9.996709405747332e-07, "loss": 0.6915, "step": 378 }, { "epoch": 0.0611487576637625, "grad_norm": 2.3816181128615592, "learning_rate": 9.996611922941747e-07, "loss": 0.6117, "step": 379 }, { "epoch": 0.06131010003226847, "grad_norm": 2.334344122706704, "learning_rate": 9.99651301767718e-07, "loss": 0.8217, "step": 380 }, { "epoch": 0.06147144240077444, "grad_norm": 5.954409002769765, "learning_rate": 9.996412689981785e-07, "loss": 0.5872, "step": 381 }, { "epoch": 0.06163278476928041, "grad_norm": 1.9363468183885277, "learning_rate": 9.996310939884127e-07, "loss": 0.4901, "step": 382 }, { "epoch": 0.061794127137786384, "grad_norm": 2.4314334398118134, "learning_rate": 9.99620776741317e-07, "loss": 0.7396, "step": 383 }, { "epoch": 0.061955469506292354, "grad_norm": 2.4105742600560203, "learning_rate": 9.996103172598286e-07, "loss": 0.7803, "step": 384 }, { "epoch": 0.062116811874798325, "grad_norm": 2.3394615103125, "learning_rate": 9.995997155469254e-07, "loss": 0.6462, "step": 385 }, { "epoch": 0.06227815424330429, "grad_norm": 2.0300554724569575, "learning_rate": 9.99588971605625e-07, "loss": 0.625, "step": 386 }, { "epoch": 0.06243949661181026, "grad_norm": 2.417602807179556, "learning_rate": 9.995780854389866e-07, "loss": 0.8082, "step": 387 }, { "epoch": 0.06260083898031624, "grad_norm": 1.9359216105969008, "learning_rate": 9.995670570501092e-07, "loss": 0.7773, "step": 388 }, { "epoch": 0.06276218134882221, "grad_norm": 2.239342349786182, "learning_rate": 9.995558864421321e-07, "loss": 0.5554, "step": 389 }, { "epoch": 0.06292352371732816, "grad_norm": 2.5043507864276346, "learning_rate": 9.995445736182357e-07, "loss": 0.7247, "step": 390 }, { "epoch": 0.06308486608583413, "grad_norm": 5.084860504547594, "learning_rate": 9.995331185816406e-07, "loss": 0.8388, "step": 391 }, { "epoch": 0.0632462084543401, "grad_norm": 2.603956066516887, "learning_rate": 9.995215213356075e-07, "loss": 0.7686, "step": 392 }, { "epoch": 0.06340755082284608, "grad_norm": 3.1530695839464906, "learning_rate": 9.995097818834386e-07, "loss": 0.5957, "step": 393 }, { "epoch": 0.06356889319135205, "grad_norm": 2.3443211177531356, "learning_rate": 9.994979002284756e-07, "loss": 0.7734, "step": 394 }, { "epoch": 0.06373023555985802, "grad_norm": 1.7615168216799757, "learning_rate": 9.994858763741007e-07, "loss": 0.6841, "step": 395 }, { "epoch": 0.06389157792836399, "grad_norm": 2.7477969101344595, "learning_rate": 9.994737103237373e-07, "loss": 0.8333, "step": 396 }, { "epoch": 0.06405292029686996, "grad_norm": 1.5282310776664652, "learning_rate": 9.99461402080849e-07, "loss": 0.4746, "step": 397 }, { "epoch": 0.06421426266537593, "grad_norm": 1.9862499240421498, "learning_rate": 9.994489516489396e-07, "loss": 0.8222, "step": 398 }, { "epoch": 0.0643756050338819, "grad_norm": 1.9567176430520954, "learning_rate": 9.994363590315532e-07, "loss": 0.595, "step": 399 }, { "epoch": 0.06453694740238787, "grad_norm": 2.7868070485226997, "learning_rate": 9.994236242322753e-07, "loss": 1.1215, "step": 400 }, { "epoch": 0.06469828977089384, "grad_norm": 1.9158751332204635, "learning_rate": 9.99410747254731e-07, "loss": 0.5412, "step": 401 }, { "epoch": 0.06485963213939981, "grad_norm": 2.1783668591760983, "learning_rate": 9.993977281025862e-07, "loss": 0.5621, "step": 402 }, { "epoch": 0.06502097450790578, "grad_norm": 2.0918360830242384, "learning_rate": 9.993845667795473e-07, "loss": 0.6364, "step": 403 }, { "epoch": 0.06518231687641174, "grad_norm": 2.65284672548215, "learning_rate": 9.99371263289361e-07, "loss": 0.8144, "step": 404 }, { "epoch": 0.06534365924491771, "grad_norm": 1.890853741892794, "learning_rate": 9.993578176358148e-07, "loss": 0.6143, "step": 405 }, { "epoch": 0.06550500161342368, "grad_norm": 1.83364289733528, "learning_rate": 9.993442298227364e-07, "loss": 0.4661, "step": 406 }, { "epoch": 0.06566634398192965, "grad_norm": 6.607856016811808, "learning_rate": 9.99330499853994e-07, "loss": 0.8585, "step": 407 }, { "epoch": 0.06582768635043562, "grad_norm": 2.9357101181843985, "learning_rate": 9.993166277334964e-07, "loss": 0.5685, "step": 408 }, { "epoch": 0.06598902871894159, "grad_norm": 2.5805878868399974, "learning_rate": 9.993026134651926e-07, "loss": 0.9096, "step": 409 }, { "epoch": 0.06615037108744756, "grad_norm": 1.8911961725193027, "learning_rate": 9.992884570530724e-07, "loss": 0.6407, "step": 410 }, { "epoch": 0.06631171345595353, "grad_norm": 2.143328664763702, "learning_rate": 9.992741585011659e-07, "loss": 0.6939, "step": 411 }, { "epoch": 0.0664730558244595, "grad_norm": 2.37530676215841, "learning_rate": 9.992597178135438e-07, "loss": 0.6429, "step": 412 }, { "epoch": 0.06663439819296547, "grad_norm": 2.716248600235713, "learning_rate": 9.992451349943167e-07, "loss": 0.5322, "step": 413 }, { "epoch": 0.06679574056147145, "grad_norm": 2.1653539887102387, "learning_rate": 9.992304100476366e-07, "loss": 0.8854, "step": 414 }, { "epoch": 0.06695708292997742, "grad_norm": 2.378228853742618, "learning_rate": 9.992155429776953e-07, "loss": 0.6553, "step": 415 }, { "epoch": 0.06711842529848339, "grad_norm": 2.4362550148048676, "learning_rate": 9.992005337887251e-07, "loss": 0.6901, "step": 416 }, { "epoch": 0.06727976766698936, "grad_norm": 2.851906617186021, "learning_rate": 9.99185382484999e-07, "loss": 0.6163, "step": 417 }, { "epoch": 0.06744111003549533, "grad_norm": 2.924625672735326, "learning_rate": 9.991700890708305e-07, "loss": 0.5692, "step": 418 }, { "epoch": 0.06760245240400128, "grad_norm": 2.041482475951111, "learning_rate": 9.991546535505732e-07, "loss": 0.7349, "step": 419 }, { "epoch": 0.06776379477250725, "grad_norm": 2.2895285486731125, "learning_rate": 9.991390759286211e-07, "loss": 0.6958, "step": 420 }, { "epoch": 0.06792513714101323, "grad_norm": 1.9608793212041504, "learning_rate": 9.991233562094095e-07, "loss": 0.7258, "step": 421 }, { "epoch": 0.0680864795095192, "grad_norm": 2.218623735300583, "learning_rate": 9.99107494397413e-07, "loss": 0.972, "step": 422 }, { "epoch": 0.06824782187802517, "grad_norm": 2.629151104751534, "learning_rate": 9.990914904971477e-07, "loss": 0.7645, "step": 423 }, { "epoch": 0.06840916424653114, "grad_norm": 2.2613812921217322, "learning_rate": 9.990753445131694e-07, "loss": 1.0693, "step": 424 }, { "epoch": 0.06857050661503711, "grad_norm": 1.8410481845656446, "learning_rate": 9.990590564500746e-07, "loss": 0.611, "step": 425 }, { "epoch": 0.06873184898354308, "grad_norm": 2.26614279256196, "learning_rate": 9.990426263125002e-07, "loss": 0.5427, "step": 426 }, { "epoch": 0.06889319135204905, "grad_norm": 2.0073600288895603, "learning_rate": 9.99026054105124e-07, "loss": 0.8333, "step": 427 }, { "epoch": 0.06905453372055502, "grad_norm": 1.8257397471890517, "learning_rate": 9.990093398326634e-07, "loss": 0.671, "step": 428 }, { "epoch": 0.06921587608906099, "grad_norm": 1.8555255439498775, "learning_rate": 9.98992483499877e-07, "loss": 0.7553, "step": 429 }, { "epoch": 0.06937721845756696, "grad_norm": 2.065077933441085, "learning_rate": 9.989754851115633e-07, "loss": 0.8387, "step": 430 }, { "epoch": 0.06953856082607293, "grad_norm": 3.1115048269129892, "learning_rate": 9.989583446725617e-07, "loss": 0.7566, "step": 431 }, { "epoch": 0.0696999031945789, "grad_norm": 1.8237166765500896, "learning_rate": 9.989410621877516e-07, "loss": 0.5643, "step": 432 }, { "epoch": 0.06986124556308487, "grad_norm": 17.89248676857613, "learning_rate": 9.989236376620534e-07, "loss": 0.6675, "step": 433 }, { "epoch": 0.07002258793159083, "grad_norm": 2.3643614767133045, "learning_rate": 9.989060711004272e-07, "loss": 0.553, "step": 434 }, { "epoch": 0.0701839303000968, "grad_norm": 2.352513016635343, "learning_rate": 9.988883625078742e-07, "loss": 0.7793, "step": 435 }, { "epoch": 0.07034527266860277, "grad_norm": 1.7132028710509797, "learning_rate": 9.988705118894356e-07, "loss": 0.7545, "step": 436 }, { "epoch": 0.07050661503710874, "grad_norm": 2.2884296675166063, "learning_rate": 9.988525192501933e-07, "loss": 0.7134, "step": 437 }, { "epoch": 0.07066795740561471, "grad_norm": 2.436691247822164, "learning_rate": 9.988343845952696e-07, "loss": 0.5931, "step": 438 }, { "epoch": 0.07082929977412068, "grad_norm": 2.2554248473886283, "learning_rate": 9.98816107929827e-07, "loss": 0.6486, "step": 439 }, { "epoch": 0.07099064214262665, "grad_norm": 2.0525951102333737, "learning_rate": 9.987976892590687e-07, "loss": 0.7271, "step": 440 }, { "epoch": 0.07115198451113262, "grad_norm": 1.9271243597938774, "learning_rate": 9.987791285882379e-07, "loss": 0.7083, "step": 441 }, { "epoch": 0.0713133268796386, "grad_norm": 1.8947885240301907, "learning_rate": 9.98760425922619e-07, "loss": 0.7427, "step": 442 }, { "epoch": 0.07147466924814457, "grad_norm": 2.14442451920199, "learning_rate": 9.987415812675362e-07, "loss": 0.785, "step": 443 }, { "epoch": 0.07163601161665054, "grad_norm": 1.7231203176429952, "learning_rate": 9.98722594628354e-07, "loss": 0.6916, "step": 444 }, { "epoch": 0.0717973539851565, "grad_norm": 1.9676510604781483, "learning_rate": 9.987034660104784e-07, "loss": 0.6869, "step": 445 }, { "epoch": 0.07195869635366248, "grad_norm": 2.787403200803809, "learning_rate": 9.98684195419354e-07, "loss": 0.549, "step": 446 }, { "epoch": 0.07212003872216845, "grad_norm": 3.7256628586013942, "learning_rate": 9.986647828604673e-07, "loss": 0.9989, "step": 447 }, { "epoch": 0.0722813810906744, "grad_norm": 2.496236447835705, "learning_rate": 9.986452283393451e-07, "loss": 0.6833, "step": 448 }, { "epoch": 0.07244272345918037, "grad_norm": 1.8941484516821505, "learning_rate": 9.986255318615537e-07, "loss": 0.771, "step": 449 }, { "epoch": 0.07260406582768635, "grad_norm": 2.0939277459606753, "learning_rate": 9.986056934327007e-07, "loss": 0.6575, "step": 450 }, { "epoch": 0.07276540819619232, "grad_norm": 2.4055179621886746, "learning_rate": 9.985857130584338e-07, "loss": 0.7733, "step": 451 }, { "epoch": 0.07292675056469829, "grad_norm": 2.3100826928996865, "learning_rate": 9.98565590744441e-07, "loss": 0.7107, "step": 452 }, { "epoch": 0.07308809293320426, "grad_norm": 2.1307826181323293, "learning_rate": 9.98545326496451e-07, "loss": 0.8103, "step": 453 }, { "epoch": 0.07324943530171023, "grad_norm": 1.8697031225067913, "learning_rate": 9.985249203202325e-07, "loss": 0.8655, "step": 454 }, { "epoch": 0.0734107776702162, "grad_norm": 2.6617429597255784, "learning_rate": 9.985043722215949e-07, "loss": 0.8068, "step": 455 }, { "epoch": 0.07357212003872217, "grad_norm": 2.310519943015852, "learning_rate": 9.984836822063878e-07, "loss": 0.6041, "step": 456 }, { "epoch": 0.07373346240722814, "grad_norm": 2.004645793002248, "learning_rate": 9.984628502805015e-07, "loss": 0.6679, "step": 457 }, { "epoch": 0.07389480477573411, "grad_norm": 1.9742585878990904, "learning_rate": 9.984418764498665e-07, "loss": 0.5406, "step": 458 }, { "epoch": 0.07405614714424008, "grad_norm": 9.019565507865863, "learning_rate": 9.984207607204538e-07, "loss": 0.8499, "step": 459 }, { "epoch": 0.07421748951274605, "grad_norm": 2.4088307202546995, "learning_rate": 9.983995030982747e-07, "loss": 0.766, "step": 460 }, { "epoch": 0.07437883188125202, "grad_norm": 2.4357127949135178, "learning_rate": 9.983781035893807e-07, "loss": 0.8901, "step": 461 }, { "epoch": 0.07454017424975799, "grad_norm": 2.5428373944509155, "learning_rate": 9.983565621998642e-07, "loss": 1.0172, "step": 462 }, { "epoch": 0.07470151661826395, "grad_norm": 1.6134856974296168, "learning_rate": 9.983348789358576e-07, "loss": 0.5959, "step": 463 }, { "epoch": 0.07486285898676992, "grad_norm": 1.8913534353225783, "learning_rate": 9.983130538035338e-07, "loss": 0.7434, "step": 464 }, { "epoch": 0.07502420135527589, "grad_norm": 1.8037770935054058, "learning_rate": 9.98291086809106e-07, "loss": 0.7308, "step": 465 }, { "epoch": 0.07518554372378186, "grad_norm": 1.9029603933922483, "learning_rate": 9.98268977958828e-07, "loss": 0.6171, "step": 466 }, { "epoch": 0.07534688609228783, "grad_norm": 1.8471313763351134, "learning_rate": 9.98246727258994e-07, "loss": 0.8618, "step": 467 }, { "epoch": 0.0755082284607938, "grad_norm": 2.3781852443849085, "learning_rate": 9.982243347159379e-07, "loss": 0.7592, "step": 468 }, { "epoch": 0.07566957082929977, "grad_norm": 2.2345776232631587, "learning_rate": 9.982018003360347e-07, "loss": 0.7718, "step": 469 }, { "epoch": 0.07583091319780574, "grad_norm": 2.901117201711507, "learning_rate": 9.981791241257e-07, "loss": 0.7478, "step": 470 }, { "epoch": 0.07599225556631171, "grad_norm": 1.8673679791298152, "learning_rate": 9.981563060913889e-07, "loss": 0.6816, "step": 471 }, { "epoch": 0.07615359793481769, "grad_norm": 2.1595772765796752, "learning_rate": 9.981333462395977e-07, "loss": 0.5582, "step": 472 }, { "epoch": 0.07631494030332366, "grad_norm": 2.1734030857282267, "learning_rate": 9.981102445768626e-07, "loss": 0.8138, "step": 473 }, { "epoch": 0.07647628267182963, "grad_norm": 2.2651729790527075, "learning_rate": 9.9808700110976e-07, "loss": 0.7673, "step": 474 }, { "epoch": 0.0766376250403356, "grad_norm": 6.907352687439786, "learning_rate": 9.980636158449074e-07, "loss": 0.794, "step": 475 }, { "epoch": 0.07679896740884157, "grad_norm": 2.3634237955783775, "learning_rate": 9.98040088788962e-07, "loss": 0.6928, "step": 476 }, { "epoch": 0.07696030977734754, "grad_norm": 2.136129629316397, "learning_rate": 9.980164199486216e-07, "loss": 0.7305, "step": 477 }, { "epoch": 0.0771216521458535, "grad_norm": 2.1081901330779043, "learning_rate": 9.979926093306245e-07, "loss": 0.5295, "step": 478 }, { "epoch": 0.07728299451435947, "grad_norm": 2.492500596424211, "learning_rate": 9.979686569417488e-07, "loss": 0.789, "step": 479 }, { "epoch": 0.07744433688286544, "grad_norm": 1.983376920311952, "learning_rate": 9.97944562788814e-07, "loss": 0.6997, "step": 480 }, { "epoch": 0.0776056792513714, "grad_norm": 2.429228908031511, "learning_rate": 9.979203268786788e-07, "loss": 0.8969, "step": 481 }, { "epoch": 0.07776702161987738, "grad_norm": 2.0934756654391493, "learning_rate": 9.978959492182434e-07, "loss": 0.6231, "step": 482 }, { "epoch": 0.07792836398838335, "grad_norm": 2.1401837861831767, "learning_rate": 9.97871429814447e-07, "loss": 0.6462, "step": 483 }, { "epoch": 0.07808970635688932, "grad_norm": 2.088182819642651, "learning_rate": 9.978467686742702e-07, "loss": 0.5475, "step": 484 }, { "epoch": 0.07825104872539529, "grad_norm": 2.2764146326096495, "learning_rate": 9.97821965804734e-07, "loss": 0.7644, "step": 485 }, { "epoch": 0.07841239109390126, "grad_norm": 1.8791421912630084, "learning_rate": 9.977970212128989e-07, "loss": 0.5726, "step": 486 }, { "epoch": 0.07857373346240723, "grad_norm": 2.0267478934540937, "learning_rate": 9.977719349058664e-07, "loss": 0.7881, "step": 487 }, { "epoch": 0.0787350758309132, "grad_norm": 3.4713450854049834, "learning_rate": 9.977467068907784e-07, "loss": 0.6217, "step": 488 }, { "epoch": 0.07889641819941917, "grad_norm": 1.9289029646297393, "learning_rate": 9.977213371748166e-07, "loss": 0.635, "step": 489 }, { "epoch": 0.07905776056792514, "grad_norm": 1.836047230140045, "learning_rate": 9.976958257652036e-07, "loss": 0.6698, "step": 490 }, { "epoch": 0.07921910293643111, "grad_norm": 2.245633232341554, "learning_rate": 9.976701726692022e-07, "loss": 0.8313, "step": 491 }, { "epoch": 0.07938044530493708, "grad_norm": 2.341081040574546, "learning_rate": 9.976443778941152e-07, "loss": 0.8301, "step": 492 }, { "epoch": 0.07954178767344304, "grad_norm": 2.0277618976592477, "learning_rate": 9.97618441447286e-07, "loss": 0.7233, "step": 493 }, { "epoch": 0.07970313004194901, "grad_norm": 2.2012455275499354, "learning_rate": 9.975923633360984e-07, "loss": 0.5201, "step": 494 }, { "epoch": 0.07986447241045498, "grad_norm": 1.726832096921308, "learning_rate": 9.975661435679763e-07, "loss": 0.6281, "step": 495 }, { "epoch": 0.08002581477896095, "grad_norm": 1.6269307770402033, "learning_rate": 9.975397821503844e-07, "loss": 0.5429, "step": 496 }, { "epoch": 0.08018715714746692, "grad_norm": 2.8127813410444964, "learning_rate": 9.97513279090827e-07, "loss": 0.9178, "step": 497 }, { "epoch": 0.0803484995159729, "grad_norm": 2.512185250679999, "learning_rate": 9.974866343968495e-07, "loss": 0.7122, "step": 498 }, { "epoch": 0.08050984188447886, "grad_norm": 2.052450492531304, "learning_rate": 9.97459848076037e-07, "loss": 0.6358, "step": 499 }, { "epoch": 0.08067118425298483, "grad_norm": 1.8085636935609959, "learning_rate": 9.97432920136015e-07, "loss": 0.6843, "step": 500 }, { "epoch": 0.0808325266214908, "grad_norm": 1.7615391540333774, "learning_rate": 9.974058505844498e-07, "loss": 0.4955, "step": 501 }, { "epoch": 0.08099386898999678, "grad_norm": 2.078387365773236, "learning_rate": 9.973786394290473e-07, "loss": 0.776, "step": 502 }, { "epoch": 0.08115521135850275, "grad_norm": 2.200842032982896, "learning_rate": 9.973512866775547e-07, "loss": 0.5794, "step": 503 }, { "epoch": 0.08131655372700872, "grad_norm": 1.5703752799318915, "learning_rate": 9.973237923377583e-07, "loss": 0.6859, "step": 504 }, { "epoch": 0.08147789609551469, "grad_norm": 2.232758691031229, "learning_rate": 9.972961564174856e-07, "loss": 0.7641, "step": 505 }, { "epoch": 0.08163923846402066, "grad_norm": 1.8677605423985284, "learning_rate": 9.97268378924604e-07, "loss": 0.6623, "step": 506 }, { "epoch": 0.08180058083252661, "grad_norm": 1.6705264062469967, "learning_rate": 9.972404598670217e-07, "loss": 0.6058, "step": 507 }, { "epoch": 0.08196192320103259, "grad_norm": 1.636560327697093, "learning_rate": 9.972123992526864e-07, "loss": 0.6407, "step": 508 }, { "epoch": 0.08212326556953856, "grad_norm": 3.4015246170590894, "learning_rate": 9.971841970895868e-07, "loss": 0.5055, "step": 509 }, { "epoch": 0.08228460793804453, "grad_norm": 1.7471794149791346, "learning_rate": 9.971558533857513e-07, "loss": 0.6791, "step": 510 }, { "epoch": 0.0824459503065505, "grad_norm": 1.8398743321171145, "learning_rate": 9.971273681492492e-07, "loss": 0.5939, "step": 511 }, { "epoch": 0.08260729267505647, "grad_norm": 1.7967166333277722, "learning_rate": 9.970987413881896e-07, "loss": 0.5002, "step": 512 }, { "epoch": 0.08276863504356244, "grad_norm": 1.807813109296401, "learning_rate": 9.970699731107224e-07, "loss": 0.8346, "step": 513 }, { "epoch": 0.08292997741206841, "grad_norm": 1.9998742898534085, "learning_rate": 9.970410633250374e-07, "loss": 0.7127, "step": 514 }, { "epoch": 0.08309131978057438, "grad_norm": 2.2445581112564517, "learning_rate": 9.970120120393645e-07, "loss": 0.8387, "step": 515 }, { "epoch": 0.08325266214908035, "grad_norm": 1.9270256927392782, "learning_rate": 9.969828192619746e-07, "loss": 0.8313, "step": 516 }, { "epoch": 0.08341400451758632, "grad_norm": 2.0250040124924076, "learning_rate": 9.96953485001178e-07, "loss": 0.6566, "step": 517 }, { "epoch": 0.08357534688609229, "grad_norm": 1.9343472332417466, "learning_rate": 9.96924009265326e-07, "loss": 0.7823, "step": 518 }, { "epoch": 0.08373668925459826, "grad_norm": 2.215008011366433, "learning_rate": 9.968943920628097e-07, "loss": 0.6139, "step": 519 }, { "epoch": 0.08389803162310423, "grad_norm": 2.9364174004487515, "learning_rate": 9.968646334020608e-07, "loss": 0.9492, "step": 520 }, { "epoch": 0.0840593739916102, "grad_norm": 1.769730108341967, "learning_rate": 9.968347332915509e-07, "loss": 0.627, "step": 521 }, { "epoch": 0.08422071636011616, "grad_norm": 1.9778057304769152, "learning_rate": 9.968046917397925e-07, "loss": 0.609, "step": 522 }, { "epoch": 0.08438205872862213, "grad_norm": 2.609961438095112, "learning_rate": 9.967745087553378e-07, "loss": 0.6486, "step": 523 }, { "epoch": 0.0845434010971281, "grad_norm": 2.2541592619580997, "learning_rate": 9.967441843467794e-07, "loss": 0.6656, "step": 524 }, { "epoch": 0.08470474346563407, "grad_norm": 1.516049355158938, "learning_rate": 9.967137185227501e-07, "loss": 0.6813, "step": 525 }, { "epoch": 0.08486608583414004, "grad_norm": 2.2036408165484125, "learning_rate": 9.966831112919233e-07, "loss": 0.6044, "step": 526 }, { "epoch": 0.08502742820264601, "grad_norm": 1.4927705754012206, "learning_rate": 9.966523626630122e-07, "loss": 0.6248, "step": 527 }, { "epoch": 0.08518877057115198, "grad_norm": 2.4125676634791984, "learning_rate": 9.966214726447707e-07, "loss": 0.9311, "step": 528 }, { "epoch": 0.08535011293965795, "grad_norm": 1.5669627734973832, "learning_rate": 9.965904412459922e-07, "loss": 0.4938, "step": 529 }, { "epoch": 0.08551145530816393, "grad_norm": 1.7293396445776588, "learning_rate": 9.965592684755113e-07, "loss": 0.4265, "step": 530 }, { "epoch": 0.0856727976766699, "grad_norm": 5.758525664470824, "learning_rate": 9.965279543422025e-07, "loss": 0.8384, "step": 531 }, { "epoch": 0.08583414004517587, "grad_norm": 1.9963679713179874, "learning_rate": 9.9649649885498e-07, "loss": 0.6208, "step": 532 }, { "epoch": 0.08599548241368184, "grad_norm": 1.9227435002686442, "learning_rate": 9.96464902022799e-07, "loss": 0.625, "step": 533 }, { "epoch": 0.08615682478218781, "grad_norm": 1.544710745292171, "learning_rate": 9.96433163854655e-07, "loss": 0.523, "step": 534 }, { "epoch": 0.08631816715069378, "grad_norm": 1.5075759306806336, "learning_rate": 9.964012843595825e-07, "loss": 0.7002, "step": 535 }, { "epoch": 0.08647950951919975, "grad_norm": 1.68625715193423, "learning_rate": 9.963692635466578e-07, "loss": 0.6094, "step": 536 }, { "epoch": 0.0866408518877057, "grad_norm": 2.529783601213694, "learning_rate": 9.963371014249966e-07, "loss": 0.7771, "step": 537 }, { "epoch": 0.08680219425621168, "grad_norm": 4.262552013807006, "learning_rate": 9.963047980037548e-07, "loss": 0.918, "step": 538 }, { "epoch": 0.08696353662471765, "grad_norm": 2.2539247614531743, "learning_rate": 9.962723532921286e-07, "loss": 0.7945, "step": 539 }, { "epoch": 0.08712487899322362, "grad_norm": 1.8478487007289104, "learning_rate": 9.96239767299355e-07, "loss": 0.5754, "step": 540 }, { "epoch": 0.08728622136172959, "grad_norm": 2.1399314480791265, "learning_rate": 9.962070400347101e-07, "loss": 0.6225, "step": 541 }, { "epoch": 0.08744756373023556, "grad_norm": 1.7123408723951976, "learning_rate": 9.961741715075113e-07, "loss": 0.4594, "step": 542 }, { "epoch": 0.08760890609874153, "grad_norm": 2.1315243362658918, "learning_rate": 9.961411617271157e-07, "loss": 0.6957, "step": 543 }, { "epoch": 0.0877702484672475, "grad_norm": 2.303259869667702, "learning_rate": 9.961080107029208e-07, "loss": 0.8219, "step": 544 }, { "epoch": 0.08793159083575347, "grad_norm": 2.034963294783103, "learning_rate": 9.960747184443637e-07, "loss": 0.7262, "step": 545 }, { "epoch": 0.08809293320425944, "grad_norm": 2.466030699564352, "learning_rate": 9.960412849609226e-07, "loss": 0.7758, "step": 546 }, { "epoch": 0.08825427557276541, "grad_norm": 2.1494246001204096, "learning_rate": 9.960077102621153e-07, "loss": 0.6244, "step": 547 }, { "epoch": 0.08841561794127138, "grad_norm": 1.9490632815081932, "learning_rate": 9.959739943575002e-07, "loss": 0.7824, "step": 548 }, { "epoch": 0.08857696030977735, "grad_norm": 2.3201208581356707, "learning_rate": 9.959401372566756e-07, "loss": 0.6135, "step": 549 }, { "epoch": 0.08873830267828332, "grad_norm": 1.901406560567554, "learning_rate": 9.9590613896928e-07, "loss": 0.6035, "step": 550 }, { "epoch": 0.08889964504678928, "grad_norm": 2.8704202082510557, "learning_rate": 9.958719995049924e-07, "loss": 0.8466, "step": 551 }, { "epoch": 0.08906098741529525, "grad_norm": 2.2647058135047944, "learning_rate": 9.958377188735318e-07, "loss": 0.8655, "step": 552 }, { "epoch": 0.08922232978380122, "grad_norm": 2.163044493818555, "learning_rate": 9.95803297084657e-07, "loss": 0.6337, "step": 553 }, { "epoch": 0.08938367215230719, "grad_norm": 2.3039306432364657, "learning_rate": 9.957687341481678e-07, "loss": 0.6757, "step": 554 }, { "epoch": 0.08954501452081316, "grad_norm": 1.8637164425403319, "learning_rate": 9.957340300739035e-07, "loss": 0.5805, "step": 555 }, { "epoch": 0.08970635688931913, "grad_norm": 1.6062689872836609, "learning_rate": 9.95699184871744e-07, "loss": 0.5911, "step": 556 }, { "epoch": 0.0898676992578251, "grad_norm": 2.1064714581348514, "learning_rate": 9.956641985516088e-07, "loss": 0.8287, "step": 557 }, { "epoch": 0.09002904162633107, "grad_norm": 1.7019509985041297, "learning_rate": 9.956290711234584e-07, "loss": 0.6475, "step": 558 }, { "epoch": 0.09019038399483705, "grad_norm": 1.8457757198851859, "learning_rate": 9.955938025972927e-07, "loss": 0.4362, "step": 559 }, { "epoch": 0.09035172636334302, "grad_norm": 1.6228195380093704, "learning_rate": 9.955583929831525e-07, "loss": 0.6508, "step": 560 }, { "epoch": 0.09051306873184899, "grad_norm": 1.448221032881361, "learning_rate": 9.955228422911181e-07, "loss": 0.5707, "step": 561 }, { "epoch": 0.09067441110035496, "grad_norm": 1.9265140895531245, "learning_rate": 9.954871505313103e-07, "loss": 0.5886, "step": 562 }, { "epoch": 0.09083575346886093, "grad_norm": 2.203015480295338, "learning_rate": 9.9545131771389e-07, "loss": 0.84, "step": 563 }, { "epoch": 0.0909970958373669, "grad_norm": 2.9904169566339616, "learning_rate": 9.954153438490583e-07, "loss": 0.6542, "step": 564 }, { "epoch": 0.09115843820587287, "grad_norm": 1.4022498139183885, "learning_rate": 9.953792289470562e-07, "loss": 0.491, "step": 565 }, { "epoch": 0.09131978057437883, "grad_norm": 2.1551937888828374, "learning_rate": 9.953429730181652e-07, "loss": 0.827, "step": 566 }, { "epoch": 0.0914811229428848, "grad_norm": 2.084446063109969, "learning_rate": 9.953065760727071e-07, "loss": 0.6906, "step": 567 }, { "epoch": 0.09164246531139077, "grad_norm": 1.69120311618507, "learning_rate": 9.95270038121043e-07, "loss": 0.5837, "step": 568 }, { "epoch": 0.09180380767989674, "grad_norm": 2.7414257589164057, "learning_rate": 9.952333591735753e-07, "loss": 0.8833, "step": 569 }, { "epoch": 0.09196515004840271, "grad_norm": 1.740065646741902, "learning_rate": 9.951965392407452e-07, "loss": 0.7191, "step": 570 }, { "epoch": 0.09212649241690868, "grad_norm": 1.8041449057284975, "learning_rate": 9.951595783330355e-07, "loss": 0.4837, "step": 571 }, { "epoch": 0.09228783478541465, "grad_norm": 1.8189920375790212, "learning_rate": 9.95122476460968e-07, "loss": 0.6087, "step": 572 }, { "epoch": 0.09244917715392062, "grad_norm": 1.7892014228821966, "learning_rate": 9.950852336351052e-07, "loss": 0.569, "step": 573 }, { "epoch": 0.09261051952242659, "grad_norm": 1.8555461024172373, "learning_rate": 9.950478498660495e-07, "loss": 0.5974, "step": 574 }, { "epoch": 0.09277186189093256, "grad_norm": 1.5957448106454566, "learning_rate": 9.950103251644435e-07, "loss": 0.6493, "step": 575 }, { "epoch": 0.09293320425943853, "grad_norm": 2.0357437896502617, "learning_rate": 9.949726595409697e-07, "loss": 0.7471, "step": 576 }, { "epoch": 0.0930945466279445, "grad_norm": 1.586075574755522, "learning_rate": 9.949348530063514e-07, "loss": 0.5989, "step": 577 }, { "epoch": 0.09325588899645047, "grad_norm": 2.1899741213937363, "learning_rate": 9.94896905571351e-07, "loss": 0.6582, "step": 578 }, { "epoch": 0.09341723136495644, "grad_norm": 2.6410362329738333, "learning_rate": 9.948588172467718e-07, "loss": 0.8495, "step": 579 }, { "epoch": 0.09357857373346241, "grad_norm": 1.8076654610146514, "learning_rate": 9.94820588043457e-07, "loss": 0.5789, "step": 580 }, { "epoch": 0.09373991610196837, "grad_norm": 1.9716882729119047, "learning_rate": 9.947822179722898e-07, "loss": 0.6754, "step": 581 }, { "epoch": 0.09390125847047434, "grad_norm": 1.8638332357055525, "learning_rate": 9.947437070441938e-07, "loss": 0.7662, "step": 582 }, { "epoch": 0.09406260083898031, "grad_norm": 2.170046798447534, "learning_rate": 9.947050552701321e-07, "loss": 0.6622, "step": 583 }, { "epoch": 0.09422394320748628, "grad_norm": 1.6117666306736003, "learning_rate": 9.946662626611086e-07, "loss": 0.5384, "step": 584 }, { "epoch": 0.09438528557599225, "grad_norm": 1.8627307006507872, "learning_rate": 9.946273292281669e-07, "loss": 0.5442, "step": 585 }, { "epoch": 0.09454662794449822, "grad_norm": 3.5481612104241673, "learning_rate": 9.945882549823904e-07, "loss": 0.8401, "step": 586 }, { "epoch": 0.0947079703130042, "grad_norm": 1.9961311948963072, "learning_rate": 9.945490399349034e-07, "loss": 0.5944, "step": 587 }, { "epoch": 0.09486931268151017, "grad_norm": 2.552494612789914, "learning_rate": 9.945096840968696e-07, "loss": 0.725, "step": 588 }, { "epoch": 0.09503065505001614, "grad_norm": 1.5238291701543294, "learning_rate": 9.94470187479493e-07, "loss": 0.5556, "step": 589 }, { "epoch": 0.0951919974185221, "grad_norm": 1.8256973057956585, "learning_rate": 9.944305500940177e-07, "loss": 0.5846, "step": 590 }, { "epoch": 0.09535333978702808, "grad_norm": 1.9816799936683263, "learning_rate": 9.94390771951728e-07, "loss": 0.5727, "step": 591 }, { "epoch": 0.09551468215553405, "grad_norm": 2.130133766666216, "learning_rate": 9.94350853063948e-07, "loss": 0.6859, "step": 592 }, { "epoch": 0.09567602452404002, "grad_norm": 3.006040373093883, "learning_rate": 9.943107934420418e-07, "loss": 0.5148, "step": 593 }, { "epoch": 0.09583736689254599, "grad_norm": 2.2110894356543245, "learning_rate": 9.942705930974142e-07, "loss": 0.8688, "step": 594 }, { "epoch": 0.09599870926105195, "grad_norm": 2.0729107369445128, "learning_rate": 9.942302520415093e-07, "loss": 0.5298, "step": 595 }, { "epoch": 0.09616005162955792, "grad_norm": 2.7166819983519743, "learning_rate": 9.941897702858117e-07, "loss": 0.5585, "step": 596 }, { "epoch": 0.09632139399806389, "grad_norm": 1.8726595576594889, "learning_rate": 9.941491478418457e-07, "loss": 0.7615, "step": 597 }, { "epoch": 0.09648273636656986, "grad_norm": 2.507930479512855, "learning_rate": 9.941083847211764e-07, "loss": 0.7469, "step": 598 }, { "epoch": 0.09664407873507583, "grad_norm": 1.5813703498726732, "learning_rate": 9.940674809354078e-07, "loss": 0.5914, "step": 599 }, { "epoch": 0.0968054211035818, "grad_norm": 1.4029711326734382, "learning_rate": 9.940264364961852e-07, "loss": 0.5182, "step": 600 }, { "epoch": 0.09696676347208777, "grad_norm": 1.6542403786840503, "learning_rate": 9.93985251415193e-07, "loss": 0.7542, "step": 601 }, { "epoch": 0.09712810584059374, "grad_norm": 1.7842200430746693, "learning_rate": 9.939439257041562e-07, "loss": 0.596, "step": 602 }, { "epoch": 0.09728944820909971, "grad_norm": 1.5779823012090717, "learning_rate": 9.93902459374839e-07, "loss": 0.5131, "step": 603 }, { "epoch": 0.09745079057760568, "grad_norm": 2.301875270457152, "learning_rate": 9.938608524390468e-07, "loss": 0.5217, "step": 604 }, { "epoch": 0.09761213294611165, "grad_norm": 2.4468227999637833, "learning_rate": 9.938191049086243e-07, "loss": 0.4829, "step": 605 }, { "epoch": 0.09777347531461762, "grad_norm": 3.644656458234077, "learning_rate": 9.937772167954563e-07, "loss": 0.5768, "step": 606 }, { "epoch": 0.09793481768312359, "grad_norm": 1.9743107570860405, "learning_rate": 9.93735188111468e-07, "loss": 0.6746, "step": 607 }, { "epoch": 0.09809616005162956, "grad_norm": 1.872972473003353, "learning_rate": 9.936930188686238e-07, "loss": 0.5577, "step": 608 }, { "epoch": 0.09825750242013553, "grad_norm": 2.2493903075786688, "learning_rate": 9.936507090789292e-07, "loss": 0.8104, "step": 609 }, { "epoch": 0.09841884478864149, "grad_norm": 2.021430120799719, "learning_rate": 9.936082587544287e-07, "loss": 0.6419, "step": 610 }, { "epoch": 0.09858018715714746, "grad_norm": 2.030277547607372, "learning_rate": 9.935656679072076e-07, "loss": 0.7257, "step": 611 }, { "epoch": 0.09874152952565343, "grad_norm": 1.4008788211539274, "learning_rate": 9.935229365493905e-07, "loss": 0.5448, "step": 612 }, { "epoch": 0.0989028718941594, "grad_norm": 2.1193835141262074, "learning_rate": 9.934800646931427e-07, "loss": 0.6912, "step": 613 }, { "epoch": 0.09906421426266537, "grad_norm": 2.345222010890438, "learning_rate": 9.93437052350669e-07, "loss": 0.7389, "step": 614 }, { "epoch": 0.09922555663117134, "grad_norm": 2.1854450928251805, "learning_rate": 9.933938995342145e-07, "loss": 0.6871, "step": 615 }, { "epoch": 0.09938689899967731, "grad_norm": 2.120879385497527, "learning_rate": 9.933506062560638e-07, "loss": 0.5694, "step": 616 }, { "epoch": 0.09954824136818329, "grad_norm": 2.26920945320413, "learning_rate": 9.933071725285423e-07, "loss": 0.5116, "step": 617 }, { "epoch": 0.09970958373668926, "grad_norm": 1.64979374492309, "learning_rate": 9.932635983640147e-07, "loss": 0.6529, "step": 618 }, { "epoch": 0.09987092610519523, "grad_norm": 2.643701690881763, "learning_rate": 9.932198837748857e-07, "loss": 0.5884, "step": 619 }, { "epoch": 0.1000322684737012, "grad_norm": 1.550857811102084, "learning_rate": 9.931760287736006e-07, "loss": 0.631, "step": 620 }, { "epoch": 0.10019361084220717, "grad_norm": 1.8465719448796403, "learning_rate": 9.931320333726438e-07, "loss": 0.8661, "step": 621 }, { "epoch": 0.10035495321071314, "grad_norm": 1.6690578791031745, "learning_rate": 9.930878975845405e-07, "loss": 0.5199, "step": 622 }, { "epoch": 0.10051629557921911, "grad_norm": 2.0971001539712244, "learning_rate": 9.930436214218552e-07, "loss": 0.6209, "step": 623 }, { "epoch": 0.10067763794772508, "grad_norm": 7.22848264186274, "learning_rate": 9.929992048971927e-07, "loss": 0.737, "step": 624 }, { "epoch": 0.10083898031623104, "grad_norm": 1.6487815461745123, "learning_rate": 9.929546480231978e-07, "loss": 0.4113, "step": 625 }, { "epoch": 0.101000322684737, "grad_norm": 1.9630551397359364, "learning_rate": 9.929099508125553e-07, "loss": 0.4548, "step": 626 }, { "epoch": 0.10116166505324298, "grad_norm": 2.657842629882542, "learning_rate": 9.928651132779895e-07, "loss": 0.5978, "step": 627 }, { "epoch": 0.10132300742174895, "grad_norm": 5.539872512452217, "learning_rate": 9.92820135432265e-07, "loss": 0.6071, "step": 628 }, { "epoch": 0.10148434979025492, "grad_norm": 2.3149627768240797, "learning_rate": 9.927750172881866e-07, "loss": 0.7391, "step": 629 }, { "epoch": 0.10164569215876089, "grad_norm": 1.5628085785381605, "learning_rate": 9.927297588585983e-07, "loss": 0.5604, "step": 630 }, { "epoch": 0.10180703452726686, "grad_norm": 2.0720834894702493, "learning_rate": 9.92684360156385e-07, "loss": 0.7563, "step": 631 }, { "epoch": 0.10196837689577283, "grad_norm": 1.7865428923084212, "learning_rate": 9.926388211944704e-07, "loss": 0.5974, "step": 632 }, { "epoch": 0.1021297192642788, "grad_norm": 1.9879184834137436, "learning_rate": 9.925931419858195e-07, "loss": 0.7011, "step": 633 }, { "epoch": 0.10229106163278477, "grad_norm": 2.0376597289213723, "learning_rate": 9.92547322543436e-07, "loss": 0.5394, "step": 634 }, { "epoch": 0.10245240400129074, "grad_norm": 2.0157232925933726, "learning_rate": 9.925013628803638e-07, "loss": 0.7386, "step": 635 }, { "epoch": 0.10261374636979671, "grad_norm": 1.560112540636115, "learning_rate": 9.924552630096874e-07, "loss": 0.5426, "step": 636 }, { "epoch": 0.10277508873830268, "grad_norm": 1.724509490113374, "learning_rate": 9.924090229445304e-07, "loss": 0.6548, "step": 637 }, { "epoch": 0.10293643110680865, "grad_norm": 2.3136562858905214, "learning_rate": 9.923626426980566e-07, "loss": 0.6762, "step": 638 }, { "epoch": 0.10309777347531461, "grad_norm": 2.35016851835496, "learning_rate": 9.9231612228347e-07, "loss": 0.626, "step": 639 }, { "epoch": 0.10325911584382058, "grad_norm": 1.8082484663402119, "learning_rate": 9.922694617140141e-07, "loss": 0.5987, "step": 640 }, { "epoch": 0.10342045821232655, "grad_norm": 1.8597016889017717, "learning_rate": 9.922226610029726e-07, "loss": 0.5561, "step": 641 }, { "epoch": 0.10358180058083252, "grad_norm": 1.8647513358298928, "learning_rate": 9.921757201636688e-07, "loss": 0.5572, "step": 642 }, { "epoch": 0.1037431429493385, "grad_norm": 1.8382025848171275, "learning_rate": 9.921286392094662e-07, "loss": 0.6225, "step": 643 }, { "epoch": 0.10390448531784446, "grad_norm": 2.53216536183515, "learning_rate": 9.920814181537675e-07, "loss": 0.6099, "step": 644 }, { "epoch": 0.10406582768635043, "grad_norm": 2.657954039664989, "learning_rate": 9.920340570100166e-07, "loss": 0.7117, "step": 645 }, { "epoch": 0.1042271700548564, "grad_norm": 2.7093276815011307, "learning_rate": 9.919865557916959e-07, "loss": 0.7419, "step": 646 }, { "epoch": 0.10438851242336238, "grad_norm": 1.894292442765602, "learning_rate": 9.919389145123284e-07, "loss": 0.7147, "step": 647 }, { "epoch": 0.10454985479186835, "grad_norm": 1.9031729956777579, "learning_rate": 9.91891133185477e-07, "loss": 0.6418, "step": 648 }, { "epoch": 0.10471119716037432, "grad_norm": 2.109083028295795, "learning_rate": 9.918432118247445e-07, "loss": 0.6565, "step": 649 }, { "epoch": 0.10487253952888029, "grad_norm": 2.087250599839013, "learning_rate": 9.917951504437728e-07, "loss": 0.7204, "step": 650 }, { "epoch": 0.10503388189738626, "grad_norm": 1.8118528986751044, "learning_rate": 9.917469490562446e-07, "loss": 0.4855, "step": 651 }, { "epoch": 0.10519522426589223, "grad_norm": 2.0230053767963936, "learning_rate": 9.91698607675882e-07, "loss": 0.8368, "step": 652 }, { "epoch": 0.1053565666343982, "grad_norm": 2.1011061616101068, "learning_rate": 9.916501263164472e-07, "loss": 0.5607, "step": 653 }, { "epoch": 0.10551790900290416, "grad_norm": 1.9961971248725632, "learning_rate": 9.91601504991742e-07, "loss": 0.6773, "step": 654 }, { "epoch": 0.10567925137141013, "grad_norm": 1.6368054933709357, "learning_rate": 9.915527437156081e-07, "loss": 0.5078, "step": 655 }, { "epoch": 0.1058405937399161, "grad_norm": 1.9023178067484714, "learning_rate": 9.91503842501927e-07, "loss": 0.4261, "step": 656 }, { "epoch": 0.10600193610842207, "grad_norm": 1.673956386499355, "learning_rate": 9.914548013646206e-07, "loss": 0.6268, "step": 657 }, { "epoch": 0.10616327847692804, "grad_norm": 1.8633884103217873, "learning_rate": 9.914056203176496e-07, "loss": 0.3441, "step": 658 }, { "epoch": 0.10632462084543401, "grad_norm": 3.4091665398271256, "learning_rate": 9.913562993750153e-07, "loss": 0.6048, "step": 659 }, { "epoch": 0.10648596321393998, "grad_norm": 3.7186953756984735, "learning_rate": 9.913068385507588e-07, "loss": 0.8377, "step": 660 }, { "epoch": 0.10664730558244595, "grad_norm": 2.2261128289417305, "learning_rate": 9.912572378589606e-07, "loss": 0.5819, "step": 661 }, { "epoch": 0.10680864795095192, "grad_norm": 1.3493622969099677, "learning_rate": 9.912074973137411e-07, "loss": 0.5285, "step": 662 }, { "epoch": 0.10696999031945789, "grad_norm": 2.2728962991456347, "learning_rate": 9.911576169292613e-07, "loss": 0.8464, "step": 663 }, { "epoch": 0.10713133268796386, "grad_norm": 1.741438837375421, "learning_rate": 9.911075967197207e-07, "loss": 0.6839, "step": 664 }, { "epoch": 0.10729267505646983, "grad_norm": 1.7321486746868706, "learning_rate": 9.910574366993596e-07, "loss": 0.483, "step": 665 }, { "epoch": 0.1074540174249758, "grad_norm": 2.040977192932124, "learning_rate": 9.91007136882458e-07, "loss": 0.5864, "step": 666 }, { "epoch": 0.10761535979348177, "grad_norm": 2.4646859821301845, "learning_rate": 9.90956697283335e-07, "loss": 0.6862, "step": 667 }, { "epoch": 0.10777670216198774, "grad_norm": 2.1443810471149547, "learning_rate": 9.909061179163503e-07, "loss": 0.8057, "step": 668 }, { "epoch": 0.1079380445304937, "grad_norm": 1.6771931424729243, "learning_rate": 9.90855398795903e-07, "loss": 0.6739, "step": 669 }, { "epoch": 0.10809938689899967, "grad_norm": 1.896607258485869, "learning_rate": 9.908045399364321e-07, "loss": 0.5409, "step": 670 }, { "epoch": 0.10826072926750564, "grad_norm": 2.580532545085909, "learning_rate": 9.907535413524164e-07, "loss": 0.8194, "step": 671 }, { "epoch": 0.10842207163601161, "grad_norm": 1.8858583751346956, "learning_rate": 9.907024030583742e-07, "loss": 0.5721, "step": 672 }, { "epoch": 0.10858341400451758, "grad_norm": 1.635144334540989, "learning_rate": 9.906511250688641e-07, "loss": 0.4397, "step": 673 }, { "epoch": 0.10874475637302355, "grad_norm": 1.7663092173633623, "learning_rate": 9.905997073984837e-07, "loss": 0.4425, "step": 674 }, { "epoch": 0.10890609874152953, "grad_norm": 1.9696190747506437, "learning_rate": 9.905481500618711e-07, "loss": 0.7649, "step": 675 }, { "epoch": 0.1090674411100355, "grad_norm": 2.420260771265719, "learning_rate": 9.904964530737042e-07, "loss": 0.98, "step": 676 }, { "epoch": 0.10922878347854147, "grad_norm": 3.1827217639692074, "learning_rate": 9.904446164486998e-07, "loss": 0.6496, "step": 677 }, { "epoch": 0.10939012584704744, "grad_norm": 1.5548847782585944, "learning_rate": 9.90392640201615e-07, "loss": 0.5164, "step": 678 }, { "epoch": 0.10955146821555341, "grad_norm": 2.1167609992245544, "learning_rate": 9.903405243472474e-07, "loss": 0.5573, "step": 679 }, { "epoch": 0.10971281058405938, "grad_norm": 2.5217685908637018, "learning_rate": 9.902882689004325e-07, "loss": 0.7969, "step": 680 }, { "epoch": 0.10987415295256535, "grad_norm": 1.7946387930080483, "learning_rate": 9.902358738760475e-07, "loss": 0.5862, "step": 681 }, { "epoch": 0.11003549532107132, "grad_norm": 1.7301514207580853, "learning_rate": 9.901833392890081e-07, "loss": 0.5651, "step": 682 }, { "epoch": 0.11019683768957728, "grad_norm": 1.4624461987372561, "learning_rate": 9.901306651542701e-07, "loss": 0.6045, "step": 683 }, { "epoch": 0.11035818005808325, "grad_norm": 1.479355043406384, "learning_rate": 9.90077851486829e-07, "loss": 0.5922, "step": 684 }, { "epoch": 0.11051952242658922, "grad_norm": 2.128254473944664, "learning_rate": 9.9002489830172e-07, "loss": 0.7567, "step": 685 }, { "epoch": 0.11068086479509519, "grad_norm": 1.5252984833576444, "learning_rate": 9.899718056140186e-07, "loss": 0.5636, "step": 686 }, { "epoch": 0.11084220716360116, "grad_norm": 2.1901629996061365, "learning_rate": 9.899185734388386e-07, "loss": 0.8295, "step": 687 }, { "epoch": 0.11100354953210713, "grad_norm": 2.329324400536205, "learning_rate": 9.89865201791335e-07, "loss": 0.6194, "step": 688 }, { "epoch": 0.1111648919006131, "grad_norm": 1.4480071645580523, "learning_rate": 9.898116906867016e-07, "loss": 0.613, "step": 689 }, { "epoch": 0.11132623426911907, "grad_norm": 1.7163594353689453, "learning_rate": 9.897580401401722e-07, "loss": 0.4726, "step": 690 }, { "epoch": 0.11148757663762504, "grad_norm": 1.7746791442147758, "learning_rate": 9.897042501670205e-07, "loss": 0.6295, "step": 691 }, { "epoch": 0.11164891900613101, "grad_norm": 1.8157334260852476, "learning_rate": 9.896503207825598e-07, "loss": 0.6934, "step": 692 }, { "epoch": 0.11181026137463698, "grad_norm": 2.0512162392096935, "learning_rate": 9.895962520021425e-07, "loss": 0.7646, "step": 693 }, { "epoch": 0.11197160374314295, "grad_norm": 2.492794238049088, "learning_rate": 9.895420438411615e-07, "loss": 0.5623, "step": 694 }, { "epoch": 0.11213294611164892, "grad_norm": 2.0234753225908224, "learning_rate": 9.894876963150487e-07, "loss": 0.6312, "step": 695 }, { "epoch": 0.1122942884801549, "grad_norm": 1.6128443387264357, "learning_rate": 9.894332094392765e-07, "loss": 0.5366, "step": 696 }, { "epoch": 0.11245563084866086, "grad_norm": 2.3620975777337905, "learning_rate": 9.893785832293561e-07, "loss": 0.6534, "step": 697 }, { "epoch": 0.11261697321716682, "grad_norm": 1.4892068222572032, "learning_rate": 9.89323817700839e-07, "loss": 0.6115, "step": 698 }, { "epoch": 0.11277831558567279, "grad_norm": 2.689102050206682, "learning_rate": 9.89268912869316e-07, "loss": 0.543, "step": 699 }, { "epoch": 0.11293965795417876, "grad_norm": 1.6467244435932327, "learning_rate": 9.892138687504175e-07, "loss": 0.4441, "step": 700 }, { "epoch": 0.11310100032268473, "grad_norm": 1.7701196765850589, "learning_rate": 9.891586853598138e-07, "loss": 0.7163, "step": 701 }, { "epoch": 0.1132623426911907, "grad_norm": 1.9190009732703623, "learning_rate": 9.891033627132148e-07, "loss": 0.5179, "step": 702 }, { "epoch": 0.11342368505969667, "grad_norm": 2.44920979426692, "learning_rate": 9.890479008263703e-07, "loss": 0.6695, "step": 703 }, { "epoch": 0.11358502742820265, "grad_norm": 2.922330280267406, "learning_rate": 9.889922997150691e-07, "loss": 0.5722, "step": 704 }, { "epoch": 0.11374636979670862, "grad_norm": 1.9596340513171808, "learning_rate": 9.8893655939514e-07, "loss": 0.592, "step": 705 }, { "epoch": 0.11390771216521459, "grad_norm": 1.7527336839845, "learning_rate": 9.888806798824515e-07, "loss": 0.6072, "step": 706 }, { "epoch": 0.11406905453372056, "grad_norm": 1.9148088518166406, "learning_rate": 9.888246611929117e-07, "loss": 0.7537, "step": 707 }, { "epoch": 0.11423039690222653, "grad_norm": 1.8484992590137386, "learning_rate": 9.88768503342468e-07, "loss": 0.6938, "step": 708 }, { "epoch": 0.1143917392707325, "grad_norm": 1.6548402652698386, "learning_rate": 9.887122063471081e-07, "loss": 0.7765, "step": 709 }, { "epoch": 0.11455308163923847, "grad_norm": 1.611363192090011, "learning_rate": 9.886557702228586e-07, "loss": 0.5668, "step": 710 }, { "epoch": 0.11471442400774444, "grad_norm": 1.7641356947169313, "learning_rate": 9.885991949857862e-07, "loss": 0.6615, "step": 711 }, { "epoch": 0.11487576637625041, "grad_norm": 2.034408109662104, "learning_rate": 9.885424806519967e-07, "loss": 0.5906, "step": 712 }, { "epoch": 0.11503710874475637, "grad_norm": 1.9957006855037143, "learning_rate": 9.884856272376362e-07, "loss": 0.6013, "step": 713 }, { "epoch": 0.11519845111326234, "grad_norm": 1.692345902204972, "learning_rate": 9.884286347588895e-07, "loss": 0.679, "step": 714 }, { "epoch": 0.11535979348176831, "grad_norm": 1.9073023783453993, "learning_rate": 9.88371503231982e-07, "loss": 0.6689, "step": 715 }, { "epoch": 0.11552113585027428, "grad_norm": 187.29685870630612, "learning_rate": 9.883142326731776e-07, "loss": 0.5866, "step": 716 }, { "epoch": 0.11568247821878025, "grad_norm": 1.4181904264835712, "learning_rate": 9.88256823098781e-07, "loss": 0.565, "step": 717 }, { "epoch": 0.11584382058728622, "grad_norm": 1.9683072636832362, "learning_rate": 9.881992745251355e-07, "loss": 0.6129, "step": 718 }, { "epoch": 0.11600516295579219, "grad_norm": 2.2580564162552936, "learning_rate": 9.881415869686243e-07, "loss": 0.6993, "step": 719 }, { "epoch": 0.11616650532429816, "grad_norm": 1.5235087842647352, "learning_rate": 9.880837604456703e-07, "loss": 0.4493, "step": 720 }, { "epoch": 0.11632784769280413, "grad_norm": 1.8588861415853122, "learning_rate": 9.880257949727355e-07, "loss": 0.7389, "step": 721 }, { "epoch": 0.1164891900613101, "grad_norm": 2.423112417783188, "learning_rate": 9.87967690566322e-07, "loss": 0.7125, "step": 722 }, { "epoch": 0.11665053242981607, "grad_norm": 2.4528392455234163, "learning_rate": 9.879094472429712e-07, "loss": 0.6418, "step": 723 }, { "epoch": 0.11681187479832204, "grad_norm": 1.8040666709599267, "learning_rate": 9.878510650192642e-07, "loss": 0.676, "step": 724 }, { "epoch": 0.11697321716682801, "grad_norm": 1.3918087667834573, "learning_rate": 9.877925439118215e-07, "loss": 0.4474, "step": 725 }, { "epoch": 0.11713455953533398, "grad_norm": 2.1891161806548287, "learning_rate": 9.87733883937303e-07, "loss": 0.4887, "step": 726 }, { "epoch": 0.11729590190383996, "grad_norm": 1.921663288147673, "learning_rate": 9.876750851124085e-07, "loss": 0.7281, "step": 727 }, { "epoch": 0.11745724427234591, "grad_norm": 1.5564774220682822, "learning_rate": 9.87616147453877e-07, "loss": 0.5767, "step": 728 }, { "epoch": 0.11761858664085188, "grad_norm": 1.631686538806471, "learning_rate": 9.875570709784872e-07, "loss": 0.5462, "step": 729 }, { "epoch": 0.11777992900935785, "grad_norm": 1.8678765083107267, "learning_rate": 9.87497855703057e-07, "loss": 0.4637, "step": 730 }, { "epoch": 0.11794127137786382, "grad_norm": 1.9674491823412403, "learning_rate": 9.874385016444443e-07, "loss": 0.5366, "step": 731 }, { "epoch": 0.1181026137463698, "grad_norm": 1.673128745324578, "learning_rate": 9.873790088195467e-07, "loss": 0.6247, "step": 732 }, { "epoch": 0.11826395611487577, "grad_norm": 1.7555555307747777, "learning_rate": 9.873193772453003e-07, "loss": 0.5988, "step": 733 }, { "epoch": 0.11842529848338174, "grad_norm": 2.4105771283081188, "learning_rate": 9.872596069386816e-07, "loss": 0.6292, "step": 734 }, { "epoch": 0.1185866408518877, "grad_norm": 1.7615278525444658, "learning_rate": 9.871996979167062e-07, "loss": 0.7101, "step": 735 }, { "epoch": 0.11874798322039368, "grad_norm": 1.4059115532394726, "learning_rate": 9.871396501964291e-07, "loss": 0.6108, "step": 736 }, { "epoch": 0.11890932558889965, "grad_norm": 1.5150291608767419, "learning_rate": 9.870794637949455e-07, "loss": 0.3782, "step": 737 }, { "epoch": 0.11907066795740562, "grad_norm": 1.607881908938981, "learning_rate": 9.87019138729389e-07, "loss": 0.5519, "step": 738 }, { "epoch": 0.11923201032591159, "grad_norm": 1.8449313371695542, "learning_rate": 9.869586750169334e-07, "loss": 0.7269, "step": 739 }, { "epoch": 0.11939335269441756, "grad_norm": 2.397497036228993, "learning_rate": 9.868980726747918e-07, "loss": 0.654, "step": 740 }, { "epoch": 0.11955469506292353, "grad_norm": 2.2211786866426584, "learning_rate": 9.868373317202167e-07, "loss": 0.5752, "step": 741 }, { "epoch": 0.11971603743142949, "grad_norm": 1.565824329227845, "learning_rate": 9.867764521705005e-07, "loss": 0.6687, "step": 742 }, { "epoch": 0.11987737979993546, "grad_norm": 1.5739565647535236, "learning_rate": 9.86715434042974e-07, "loss": 0.6026, "step": 743 }, { "epoch": 0.12003872216844143, "grad_norm": 6.92119076621317, "learning_rate": 9.86654277355009e-07, "loss": 0.5184, "step": 744 }, { "epoch": 0.1202000645369474, "grad_norm": 1.8491827242089456, "learning_rate": 9.86592982124015e-07, "loss": 0.5306, "step": 745 }, { "epoch": 0.12036140690545337, "grad_norm": 1.792544379117219, "learning_rate": 9.865315483674423e-07, "loss": 0.6857, "step": 746 }, { "epoch": 0.12052274927395934, "grad_norm": 2.4694827959906105, "learning_rate": 9.8646997610278e-07, "loss": 0.6197, "step": 747 }, { "epoch": 0.12068409164246531, "grad_norm": 1.8120231658894685, "learning_rate": 9.86408265347557e-07, "loss": 0.7493, "step": 748 }, { "epoch": 0.12084543401097128, "grad_norm": 2.0368934290129186, "learning_rate": 9.86346416119341e-07, "loss": 0.7133, "step": 749 }, { "epoch": 0.12100677637947725, "grad_norm": 1.7984997245569896, "learning_rate": 9.862844284357398e-07, "loss": 0.6091, "step": 750 }, { "epoch": 0.12116811874798322, "grad_norm": 1.7203612664540777, "learning_rate": 9.862223023144004e-07, "loss": 0.5054, "step": 751 }, { "epoch": 0.12132946111648919, "grad_norm": 2.3337244886690613, "learning_rate": 9.86160037773009e-07, "loss": 0.7924, "step": 752 }, { "epoch": 0.12149080348499516, "grad_norm": 2.407073437383867, "learning_rate": 9.860976348292915e-07, "loss": 0.7704, "step": 753 }, { "epoch": 0.12165214585350113, "grad_norm": 2.4119782076241805, "learning_rate": 9.860350935010129e-07, "loss": 0.7674, "step": 754 }, { "epoch": 0.1218134882220071, "grad_norm": 1.7031445983319058, "learning_rate": 9.85972413805978e-07, "loss": 0.7919, "step": 755 }, { "epoch": 0.12197483059051308, "grad_norm": 1.9788674647057831, "learning_rate": 9.859095957620306e-07, "loss": 0.8132, "step": 756 }, { "epoch": 0.12213617295901903, "grad_norm": 1.936313635098725, "learning_rate": 9.858466393870539e-07, "loss": 0.6157, "step": 757 }, { "epoch": 0.122297515327525, "grad_norm": 1.5966192858175692, "learning_rate": 9.857835446989707e-07, "loss": 0.8219, "step": 758 }, { "epoch": 0.12245885769603097, "grad_norm": 20.985778352646495, "learning_rate": 9.857203117157433e-07, "loss": 0.68, "step": 759 }, { "epoch": 0.12262020006453694, "grad_norm": 2.0675162379078067, "learning_rate": 9.85656940455373e-07, "loss": 0.5453, "step": 760 }, { "epoch": 0.12278154243304291, "grad_norm": 2.15125791434074, "learning_rate": 9.855934309359005e-07, "loss": 0.5922, "step": 761 }, { "epoch": 0.12294288480154889, "grad_norm": 1.8514322484554295, "learning_rate": 9.855297831754062e-07, "loss": 0.7415, "step": 762 }, { "epoch": 0.12310422717005486, "grad_norm": 1.9804777186564984, "learning_rate": 9.854659971920096e-07, "loss": 0.6432, "step": 763 }, { "epoch": 0.12326556953856083, "grad_norm": 1.6146570578529942, "learning_rate": 9.854020730038694e-07, "loss": 0.5802, "step": 764 }, { "epoch": 0.1234269119070668, "grad_norm": 1.7296970274127852, "learning_rate": 9.853380106291843e-07, "loss": 0.6314, "step": 765 }, { "epoch": 0.12358825427557277, "grad_norm": 1.808077514003788, "learning_rate": 9.852738100861915e-07, "loss": 0.6303, "step": 766 }, { "epoch": 0.12374959664407874, "grad_norm": 1.6486953605985935, "learning_rate": 9.852094713931678e-07, "loss": 0.6058, "step": 767 }, { "epoch": 0.12391093901258471, "grad_norm": 1.8363371069225334, "learning_rate": 9.851449945684299e-07, "loss": 0.474, "step": 768 }, { "epoch": 0.12407228138109068, "grad_norm": 2.2266326357854527, "learning_rate": 9.850803796303329e-07, "loss": 0.6633, "step": 769 }, { "epoch": 0.12423362374959665, "grad_norm": 2.0017576838670195, "learning_rate": 9.85015626597272e-07, "loss": 0.7477, "step": 770 }, { "epoch": 0.12439496611810262, "grad_norm": 1.6696679351648493, "learning_rate": 9.849507354876811e-07, "loss": 0.6248, "step": 771 }, { "epoch": 0.12455630848660858, "grad_norm": 1.595382079158584, "learning_rate": 9.848857063200342e-07, "loss": 0.6688, "step": 772 }, { "epoch": 0.12471765085511455, "grad_norm": 1.9807369494496332, "learning_rate": 9.848205391128436e-07, "loss": 0.6606, "step": 773 }, { "epoch": 0.12487899322362052, "grad_norm": 2.2315209574158676, "learning_rate": 9.847552338846615e-07, "loss": 0.7644, "step": 774 }, { "epoch": 0.1250403355921265, "grad_norm": 2.304315569333443, "learning_rate": 9.846897906540796e-07, "loss": 0.6857, "step": 775 }, { "epoch": 0.12520167796063247, "grad_norm": 2.7400172848399924, "learning_rate": 9.846242094397284e-07, "loss": 0.4975, "step": 776 }, { "epoch": 0.12536302032913843, "grad_norm": 2.1805980243583294, "learning_rate": 9.845584902602776e-07, "loss": 0.4843, "step": 777 }, { "epoch": 0.12552436269764441, "grad_norm": 1.537497717382706, "learning_rate": 9.84492633134437e-07, "loss": 0.6462, "step": 778 }, { "epoch": 0.12568570506615037, "grad_norm": 2.080975864444583, "learning_rate": 9.844266380809547e-07, "loss": 0.6454, "step": 779 }, { "epoch": 0.12584704743465633, "grad_norm": 1.8675914630680888, "learning_rate": 9.843605051186187e-07, "loss": 0.7526, "step": 780 }, { "epoch": 0.1260083898031623, "grad_norm": 2.2618004449772244, "learning_rate": 9.84294234266256e-07, "loss": 0.7131, "step": 781 }, { "epoch": 0.12616973217166827, "grad_norm": 2.143453914558904, "learning_rate": 9.842278255427326e-07, "loss": 0.5433, "step": 782 }, { "epoch": 0.12633107454017425, "grad_norm": 2.2383368520171723, "learning_rate": 9.841612789669544e-07, "loss": 0.7548, "step": 783 }, { "epoch": 0.1264924169086802, "grad_norm": 1.9101157466908192, "learning_rate": 9.84094594557866e-07, "loss": 0.5891, "step": 784 }, { "epoch": 0.1266537592771862, "grad_norm": 1.9450481189178856, "learning_rate": 9.840277723344516e-07, "loss": 0.708, "step": 785 }, { "epoch": 0.12681510164569215, "grad_norm": 1.9566700006139233, "learning_rate": 9.839608123157347e-07, "loss": 0.674, "step": 786 }, { "epoch": 0.12697644401419814, "grad_norm": 1.9312077144203241, "learning_rate": 9.838937145207771e-07, "loss": 0.5254, "step": 787 }, { "epoch": 0.1271377863827041, "grad_norm": 1.8970273278801228, "learning_rate": 9.838264789686808e-07, "loss": 0.5753, "step": 788 }, { "epoch": 0.12729912875121008, "grad_norm": 1.544791697191104, "learning_rate": 9.83759105678587e-07, "loss": 0.4579, "step": 789 }, { "epoch": 0.12746047111971603, "grad_norm": 2.3644313567360236, "learning_rate": 9.836915946696757e-07, "loss": 0.5894, "step": 790 }, { "epoch": 0.12762181348822202, "grad_norm": 1.824468101433789, "learning_rate": 9.836239459611661e-07, "loss": 0.6556, "step": 791 }, { "epoch": 0.12778315585672798, "grad_norm": 1.9378156712467185, "learning_rate": 9.83556159572317e-07, "loss": 0.6049, "step": 792 }, { "epoch": 0.12794449822523396, "grad_norm": 2.137856245191374, "learning_rate": 9.83488235522426e-07, "loss": 0.6625, "step": 793 }, { "epoch": 0.12810584059373992, "grad_norm": 2.257614074224376, "learning_rate": 9.8342017383083e-07, "loss": 0.6688, "step": 794 }, { "epoch": 0.12826718296224587, "grad_norm": 2.1321992656277473, "learning_rate": 9.833519745169053e-07, "loss": 0.4396, "step": 795 }, { "epoch": 0.12842852533075186, "grad_norm": 2.118290350012955, "learning_rate": 9.83283637600067e-07, "loss": 0.4498, "step": 796 }, { "epoch": 0.12858986769925781, "grad_norm": 1.8095054223684803, "learning_rate": 9.832151630997697e-07, "loss": 0.6265, "step": 797 }, { "epoch": 0.1287512100677638, "grad_norm": 1.9144147276360075, "learning_rate": 9.831465510355067e-07, "loss": 0.7118, "step": 798 }, { "epoch": 0.12891255243626976, "grad_norm": 2.2305819454297344, "learning_rate": 9.830778014268114e-07, "loss": 0.7889, "step": 799 }, { "epoch": 0.12907389480477574, "grad_norm": 1.87921476309634, "learning_rate": 9.830089142932556e-07, "loss": 0.6947, "step": 800 }, { "epoch": 0.1292352371732817, "grad_norm": 2.3431580876913904, "learning_rate": 9.829398896544502e-07, "loss": 0.7502, "step": 801 }, { "epoch": 0.12939657954178768, "grad_norm": 1.6982268145733543, "learning_rate": 9.828707275300452e-07, "loss": 0.6813, "step": 802 }, { "epoch": 0.12955792191029364, "grad_norm": 2.097900831004296, "learning_rate": 9.828014279397307e-07, "loss": 0.6972, "step": 803 }, { "epoch": 0.12971926427879962, "grad_norm": 1.6967507164052786, "learning_rate": 9.827319909032348e-07, "loss": 0.513, "step": 804 }, { "epoch": 0.12988060664730558, "grad_norm": 1.9167072319492593, "learning_rate": 9.826624164403252e-07, "loss": 0.6536, "step": 805 }, { "epoch": 0.13004194901581156, "grad_norm": 1.4691584404714892, "learning_rate": 9.825927045708086e-07, "loss": 0.5793, "step": 806 }, { "epoch": 0.13020329138431752, "grad_norm": 1.478854544935263, "learning_rate": 9.825228553145312e-07, "loss": 0.4014, "step": 807 }, { "epoch": 0.13036463375282348, "grad_norm": 3.2981903200402236, "learning_rate": 9.824528686913778e-07, "loss": 0.7133, "step": 808 }, { "epoch": 0.13052597612132946, "grad_norm": 1.743426579388038, "learning_rate": 9.823827447212725e-07, "loss": 0.5499, "step": 809 }, { "epoch": 0.13068731848983542, "grad_norm": 1.9548797659231367, "learning_rate": 9.823124834241786e-07, "loss": 0.736, "step": 810 }, { "epoch": 0.1308486608583414, "grad_norm": 2.165598740631797, "learning_rate": 9.822420848200984e-07, "loss": 0.8363, "step": 811 }, { "epoch": 0.13101000322684736, "grad_norm": 1.5147690076326408, "learning_rate": 9.821715489290734e-07, "loss": 0.55, "step": 812 }, { "epoch": 0.13117134559535334, "grad_norm": 1.5162801997737998, "learning_rate": 9.821008757711839e-07, "loss": 0.559, "step": 813 }, { "epoch": 0.1313326879638593, "grad_norm": 1.6948075487377028, "learning_rate": 9.820300653665495e-07, "loss": 0.5306, "step": 814 }, { "epoch": 0.13149403033236529, "grad_norm": 2.047939926227517, "learning_rate": 9.81959117735329e-07, "loss": 0.6244, "step": 815 }, { "epoch": 0.13165537270087124, "grad_norm": 2.062203125828718, "learning_rate": 9.8188803289772e-07, "loss": 0.588, "step": 816 }, { "epoch": 0.13181671506937723, "grad_norm": 2.8542093612559083, "learning_rate": 9.818168108739591e-07, "loss": 0.8013, "step": 817 }, { "epoch": 0.13197805743788318, "grad_norm": 1.9308733057333356, "learning_rate": 9.817454516843224e-07, "loss": 0.6202, "step": 818 }, { "epoch": 0.13213939980638917, "grad_norm": 1.7942765396832072, "learning_rate": 9.816739553491247e-07, "loss": 0.6224, "step": 819 }, { "epoch": 0.13230074217489513, "grad_norm": 1.8813433475083674, "learning_rate": 9.816023218887198e-07, "loss": 0.7018, "step": 820 }, { "epoch": 0.1324620845434011, "grad_norm": 1.4063642667334195, "learning_rate": 9.815305513235007e-07, "loss": 0.6349, "step": 821 }, { "epoch": 0.13262342691190707, "grad_norm": 1.4267852174814997, "learning_rate": 9.814586436738997e-07, "loss": 0.5596, "step": 822 }, { "epoch": 0.13278476928041302, "grad_norm": 1.883573639428883, "learning_rate": 9.81386598960387e-07, "loss": 0.5479, "step": 823 }, { "epoch": 0.132946111648919, "grad_norm": 2.4407123037199674, "learning_rate": 9.813144172034735e-07, "loss": 0.7679, "step": 824 }, { "epoch": 0.13310745401742496, "grad_norm": 1.5133199565705164, "learning_rate": 9.81242098423708e-07, "loss": 0.6406, "step": 825 }, { "epoch": 0.13326879638593095, "grad_norm": 1.7690698556859972, "learning_rate": 9.811696426416783e-07, "loss": 0.5404, "step": 826 }, { "epoch": 0.1334301387544369, "grad_norm": 2.0687881327914917, "learning_rate": 9.810970498780113e-07, "loss": 0.5636, "step": 827 }, { "epoch": 0.1335914811229429, "grad_norm": 2.00063063692528, "learning_rate": 9.810243201533737e-07, "loss": 0.451, "step": 828 }, { "epoch": 0.13375282349144885, "grad_norm": 2.3042884610519274, "learning_rate": 9.8095145348847e-07, "loss": 0.8098, "step": 829 }, { "epoch": 0.13391416585995483, "grad_norm": 2.1027058607418434, "learning_rate": 9.808784499040445e-07, "loss": 0.8047, "step": 830 }, { "epoch": 0.1340755082284608, "grad_norm": 1.8366735403321146, "learning_rate": 9.8080530942088e-07, "loss": 0.4741, "step": 831 }, { "epoch": 0.13423685059696677, "grad_norm": 1.9585856890579096, "learning_rate": 9.807320320597985e-07, "loss": 0.6133, "step": 832 }, { "epoch": 0.13439819296547273, "grad_norm": 2.7322902034203445, "learning_rate": 9.806586178416612e-07, "loss": 0.5059, "step": 833 }, { "epoch": 0.1345595353339787, "grad_norm": 1.982450498176983, "learning_rate": 9.805850667873678e-07, "loss": 0.7385, "step": 834 }, { "epoch": 0.13472087770248467, "grad_norm": 1.995286274306714, "learning_rate": 9.805113789178571e-07, "loss": 0.6573, "step": 835 }, { "epoch": 0.13488222007099065, "grad_norm": 2.4558116997407886, "learning_rate": 9.804375542541072e-07, "loss": 0.5702, "step": 836 }, { "epoch": 0.1350435624394966, "grad_norm": 2.225795680729369, "learning_rate": 9.803635928171346e-07, "loss": 0.8304, "step": 837 }, { "epoch": 0.13520490480800257, "grad_norm": 1.4217953502437872, "learning_rate": 9.802894946279948e-07, "loss": 0.6195, "step": 838 }, { "epoch": 0.13536624717650855, "grad_norm": 1.7083769846976053, "learning_rate": 9.802152597077828e-07, "loss": 0.5123, "step": 839 }, { "epoch": 0.1355275895450145, "grad_norm": 2.1035900899728732, "learning_rate": 9.80140888077632e-07, "loss": 0.5158, "step": 840 }, { "epoch": 0.1356889319135205, "grad_norm": 1.8078444310750288, "learning_rate": 9.80066379758715e-07, "loss": 0.8009, "step": 841 }, { "epoch": 0.13585027428202645, "grad_norm": 2.872200971664563, "learning_rate": 9.79991734772243e-07, "loss": 0.8461, "step": 842 }, { "epoch": 0.13601161665053244, "grad_norm": 1.6767722447462394, "learning_rate": 9.799169531394662e-07, "loss": 0.6294, "step": 843 }, { "epoch": 0.1361729590190384, "grad_norm": 2.9165475185217558, "learning_rate": 9.79842034881674e-07, "loss": 0.7588, "step": 844 }, { "epoch": 0.13633430138754438, "grad_norm": 2.051070013152291, "learning_rate": 9.797669800201943e-07, "loss": 0.5165, "step": 845 }, { "epoch": 0.13649564375605033, "grad_norm": 1.6413107755729013, "learning_rate": 9.796917885763945e-07, "loss": 0.6482, "step": 846 }, { "epoch": 0.13665698612455632, "grad_norm": 1.6466042688776599, "learning_rate": 9.7961646057168e-07, "loss": 0.5839, "step": 847 }, { "epoch": 0.13681832849306227, "grad_norm": 5.968917764433051, "learning_rate": 9.795409960274955e-07, "loss": 0.5774, "step": 848 }, { "epoch": 0.13697967086156826, "grad_norm": 1.8040888069860728, "learning_rate": 9.794653949653248e-07, "loss": 0.5911, "step": 849 }, { "epoch": 0.13714101323007422, "grad_norm": 2.279015374145607, "learning_rate": 9.793896574066905e-07, "loss": 0.73, "step": 850 }, { "epoch": 0.1373023555985802, "grad_norm": 2.159495026582934, "learning_rate": 9.793137833731536e-07, "loss": 0.5262, "step": 851 }, { "epoch": 0.13746369796708616, "grad_norm": 1.8785072585070497, "learning_rate": 9.792377728863144e-07, "loss": 0.7481, "step": 852 }, { "epoch": 0.1376250403355921, "grad_norm": 1.663195675305482, "learning_rate": 9.791616259678121e-07, "loss": 0.5274, "step": 853 }, { "epoch": 0.1377863827040981, "grad_norm": 1.5921789167208382, "learning_rate": 9.790853426393244e-07, "loss": 0.6834, "step": 854 }, { "epoch": 0.13794772507260405, "grad_norm": 1.8515276684281432, "learning_rate": 9.79008922922568e-07, "loss": 0.683, "step": 855 }, { "epoch": 0.13810906744111004, "grad_norm": 2.171076017007409, "learning_rate": 9.789323668392984e-07, "loss": 0.6325, "step": 856 }, { "epoch": 0.138270409809616, "grad_norm": 1.6035284118514228, "learning_rate": 9.788556744113099e-07, "loss": 0.7193, "step": 857 }, { "epoch": 0.13843175217812198, "grad_norm": 1.398721932409981, "learning_rate": 9.787788456604357e-07, "loss": 0.5545, "step": 858 }, { "epoch": 0.13859309454662794, "grad_norm": 1.641825427529313, "learning_rate": 9.787018806085482e-07, "loss": 0.4378, "step": 859 }, { "epoch": 0.13875443691513392, "grad_norm": 1.8051516113644297, "learning_rate": 9.786247792775573e-07, "loss": 0.5739, "step": 860 }, { "epoch": 0.13891577928363988, "grad_norm": 1.93621592886336, "learning_rate": 9.785475416894133e-07, "loss": 0.6352, "step": 861 }, { "epoch": 0.13907712165214586, "grad_norm": 1.774166142344304, "learning_rate": 9.784701678661044e-07, "loss": 0.4171, "step": 862 }, { "epoch": 0.13923846402065182, "grad_norm": 2.336723578118706, "learning_rate": 9.783926578296575e-07, "loss": 0.818, "step": 863 }, { "epoch": 0.1393998063891578, "grad_norm": 1.7691178333636217, "learning_rate": 9.783150116021387e-07, "loss": 0.5529, "step": 864 }, { "epoch": 0.13956114875766376, "grad_norm": 1.4312346378484804, "learning_rate": 9.782372292056529e-07, "loss": 0.6103, "step": 865 }, { "epoch": 0.13972249112616975, "grad_norm": 2.3099514088329722, "learning_rate": 9.781593106623431e-07, "loss": 0.6882, "step": 866 }, { "epoch": 0.1398838334946757, "grad_norm": 1.7739899568843294, "learning_rate": 9.780812559943918e-07, "loss": 0.6378, "step": 867 }, { "epoch": 0.14004517586318166, "grad_norm": 2.4341959935238773, "learning_rate": 9.780030652240199e-07, "loss": 0.752, "step": 868 }, { "epoch": 0.14020651823168764, "grad_norm": 1.7959146338142695, "learning_rate": 9.779247383734873e-07, "loss": 0.7994, "step": 869 }, { "epoch": 0.1403678606001936, "grad_norm": 1.9709041254772186, "learning_rate": 9.77846275465092e-07, "loss": 0.5692, "step": 870 }, { "epoch": 0.14052920296869958, "grad_norm": 2.011691491396028, "learning_rate": 9.777676765211718e-07, "loss": 0.6291, "step": 871 }, { "epoch": 0.14069054533720554, "grad_norm": 1.661574603788734, "learning_rate": 9.776889415641021e-07, "loss": 0.5356, "step": 872 }, { "epoch": 0.14085188770571153, "grad_norm": 1.5814834211350661, "learning_rate": 9.776100706162976e-07, "loss": 0.6027, "step": 873 }, { "epoch": 0.14101323007421748, "grad_norm": 1.4090917907499536, "learning_rate": 9.775310637002122e-07, "loss": 0.516, "step": 874 }, { "epoch": 0.14117457244272347, "grad_norm": 2.382320866129917, "learning_rate": 9.774519208383373e-07, "loss": 0.5544, "step": 875 }, { "epoch": 0.14133591481122942, "grad_norm": 1.2836689328576742, "learning_rate": 9.77372642053204e-07, "loss": 0.5617, "step": 876 }, { "epoch": 0.1414972571797354, "grad_norm": 1.627914456100011, "learning_rate": 9.772932273673815e-07, "loss": 0.5718, "step": 877 }, { "epoch": 0.14165859954824137, "grad_norm": 1.6212524336397351, "learning_rate": 9.772136768034785e-07, "loss": 0.7282, "step": 878 }, { "epoch": 0.14181994191674735, "grad_norm": 1.7174918685120903, "learning_rate": 9.77133990384141e-07, "loss": 0.5837, "step": 879 }, { "epoch": 0.1419812842852533, "grad_norm": 1.8387434275060388, "learning_rate": 9.770541681320553e-07, "loss": 0.6776, "step": 880 }, { "epoch": 0.1421426266537593, "grad_norm": 1.6298928326206747, "learning_rate": 9.76974210069945e-07, "loss": 0.4727, "step": 881 }, { "epoch": 0.14230396902226525, "grad_norm": 1.636817874501534, "learning_rate": 9.768941162205733e-07, "loss": 0.4727, "step": 882 }, { "epoch": 0.1424653113907712, "grad_norm": 2.3725009619949047, "learning_rate": 9.768138866067414e-07, "loss": 0.6369, "step": 883 }, { "epoch": 0.1426266537592772, "grad_norm": 1.6509303186777802, "learning_rate": 9.767335212512897e-07, "loss": 0.4354, "step": 884 }, { "epoch": 0.14278799612778315, "grad_norm": 1.7001259672954359, "learning_rate": 9.766530201770967e-07, "loss": 0.7258, "step": 885 }, { "epoch": 0.14294933849628913, "grad_norm": 1.5584637962985117, "learning_rate": 9.765723834070804e-07, "loss": 0.8039, "step": 886 }, { "epoch": 0.1431106808647951, "grad_norm": 1.6315059271098902, "learning_rate": 9.764916109641964e-07, "loss": 0.7091, "step": 887 }, { "epoch": 0.14327202323330107, "grad_norm": 3.0387182359435787, "learning_rate": 9.76410702871439e-07, "loss": 0.6375, "step": 888 }, { "epoch": 0.14343336560180703, "grad_norm": 1.6766949632404398, "learning_rate": 9.763296591518425e-07, "loss": 0.571, "step": 889 }, { "epoch": 0.143594707970313, "grad_norm": 1.6370997019695663, "learning_rate": 9.76248479828478e-07, "loss": 0.5162, "step": 890 }, { "epoch": 0.14375605033881897, "grad_norm": 1.8078314408453326, "learning_rate": 9.761671649244562e-07, "loss": 0.6979, "step": 891 }, { "epoch": 0.14391739270732495, "grad_norm": 2.0964013009468907, "learning_rate": 9.760857144629263e-07, "loss": 0.7345, "step": 892 }, { "epoch": 0.1440787350758309, "grad_norm": 1.7748314172984745, "learning_rate": 9.76004128467076e-07, "loss": 0.4254, "step": 893 }, { "epoch": 0.1442400774443369, "grad_norm": 2.4723725600614137, "learning_rate": 9.759224069601316e-07, "loss": 0.6817, "step": 894 }, { "epoch": 0.14440141981284285, "grad_norm": 2.0715890079999957, "learning_rate": 9.758405499653578e-07, "loss": 0.4768, "step": 895 }, { "epoch": 0.1445627621813488, "grad_norm": 2.5769832973035154, "learning_rate": 9.757585575060583e-07, "loss": 0.4619, "step": 896 }, { "epoch": 0.1447241045498548, "grad_norm": 1.7326074477175586, "learning_rate": 9.756764296055747e-07, "loss": 0.7252, "step": 897 }, { "epoch": 0.14488544691836075, "grad_norm": 1.7803837192211815, "learning_rate": 9.755941662872882e-07, "loss": 0.5042, "step": 898 }, { "epoch": 0.14504678928686673, "grad_norm": 2.1320960549765307, "learning_rate": 9.755117675746172e-07, "loss": 0.5714, "step": 899 }, { "epoch": 0.1452081316553727, "grad_norm": 1.6487772803836933, "learning_rate": 9.7542923349102e-07, "loss": 0.6455, "step": 900 }, { "epoch": 0.14536947402387868, "grad_norm": 1.7419415629644293, "learning_rate": 9.75346564059992e-07, "loss": 0.7978, "step": 901 }, { "epoch": 0.14553081639238463, "grad_norm": 1.9203090956357447, "learning_rate": 9.752637593050688e-07, "loss": 0.8312, "step": 902 }, { "epoch": 0.14569215876089062, "grad_norm": 1.3394292073354672, "learning_rate": 9.75180819249823e-07, "loss": 0.6093, "step": 903 }, { "epoch": 0.14585350112939657, "grad_norm": 2.0013229048040233, "learning_rate": 9.750977439178667e-07, "loss": 0.4877, "step": 904 }, { "epoch": 0.14601484349790256, "grad_norm": 1.9813181844344387, "learning_rate": 9.7501453333285e-07, "loss": 0.6632, "step": 905 }, { "epoch": 0.14617618586640851, "grad_norm": 1.7843448455988369, "learning_rate": 9.749311875184618e-07, "loss": 0.4896, "step": 906 }, { "epoch": 0.1463375282349145, "grad_norm": 2.1130859510395483, "learning_rate": 9.748477064984294e-07, "loss": 0.7032, "step": 907 }, { "epoch": 0.14649887060342046, "grad_norm": 2.4871289805614194, "learning_rate": 9.747640902965182e-07, "loss": 0.8574, "step": 908 }, { "epoch": 0.14666021297192644, "grad_norm": 2.68692524996308, "learning_rate": 9.74680338936533e-07, "loss": 0.7475, "step": 909 }, { "epoch": 0.1468215553404324, "grad_norm": 3.384687859752947, "learning_rate": 9.745964524423164e-07, "loss": 0.7593, "step": 910 }, { "epoch": 0.14698289770893835, "grad_norm": 2.060467614552312, "learning_rate": 9.745124308377492e-07, "loss": 0.5846, "step": 911 }, { "epoch": 0.14714424007744434, "grad_norm": 1.7601336942091865, "learning_rate": 9.744282741467516e-07, "loss": 0.4962, "step": 912 }, { "epoch": 0.1473055824459503, "grad_norm": 1.801841109503242, "learning_rate": 9.743439823932812e-07, "loss": 0.7869, "step": 913 }, { "epoch": 0.14746692481445628, "grad_norm": 2.2500199211086427, "learning_rate": 9.742595556013352e-07, "loss": 0.6069, "step": 914 }, { "epoch": 0.14762826718296224, "grad_norm": 1.4815523313870582, "learning_rate": 9.741749937949482e-07, "loss": 0.5194, "step": 915 }, { "epoch": 0.14778960955146822, "grad_norm": 1.2915585482887861, "learning_rate": 9.740902969981935e-07, "loss": 0.5393, "step": 916 }, { "epoch": 0.14795095191997418, "grad_norm": 1.5968574448309212, "learning_rate": 9.740054652351833e-07, "loss": 0.3471, "step": 917 }, { "epoch": 0.14811229428848016, "grad_norm": 1.9068552525940985, "learning_rate": 9.739204985300679e-07, "loss": 0.5308, "step": 918 }, { "epoch": 0.14827363665698612, "grad_norm": 1.8582623783244838, "learning_rate": 9.738353969070359e-07, "loss": 0.69, "step": 919 }, { "epoch": 0.1484349790254921, "grad_norm": 1.7656467402647311, "learning_rate": 9.737501603903144e-07, "loss": 0.5893, "step": 920 }, { "epoch": 0.14859632139399806, "grad_norm": 1.4673118043025253, "learning_rate": 9.736647890041688e-07, "loss": 0.706, "step": 921 }, { "epoch": 0.14875766376250404, "grad_norm": 1.4458415120064807, "learning_rate": 9.735792827729035e-07, "loss": 0.7019, "step": 922 }, { "epoch": 0.14891900613101, "grad_norm": 2.055378496717942, "learning_rate": 9.734936417208604e-07, "loss": 0.672, "step": 923 }, { "epoch": 0.14908034849951599, "grad_norm": 1.715713037340879, "learning_rate": 9.734078658724203e-07, "loss": 0.4921, "step": 924 }, { "epoch": 0.14924169086802194, "grad_norm": 1.2972522324833065, "learning_rate": 9.733219552520024e-07, "loss": 0.5156, "step": 925 }, { "epoch": 0.1494030332365279, "grad_norm": 1.897614288808441, "learning_rate": 9.732359098840642e-07, "loss": 0.6929, "step": 926 }, { "epoch": 0.14956437560503388, "grad_norm": 2.136766729565862, "learning_rate": 9.73149729793101e-07, "loss": 0.5395, "step": 927 }, { "epoch": 0.14972571797353984, "grad_norm": 1.4813836725496305, "learning_rate": 9.730634150036475e-07, "loss": 0.4644, "step": 928 }, { "epoch": 0.14988706034204582, "grad_norm": 2.007829243037255, "learning_rate": 9.72976965540276e-07, "loss": 0.7234, "step": 929 }, { "epoch": 0.15004840271055178, "grad_norm": 1.2924292323293871, "learning_rate": 9.728903814275972e-07, "loss": 0.6184, "step": 930 }, { "epoch": 0.15020974507905777, "grad_norm": 1.5787542618235664, "learning_rate": 9.728036626902607e-07, "loss": 0.6124, "step": 931 }, { "epoch": 0.15037108744756372, "grad_norm": 1.8997848815607163, "learning_rate": 9.727168093529535e-07, "loss": 0.5469, "step": 932 }, { "epoch": 0.1505324298160697, "grad_norm": 1.9688228412050333, "learning_rate": 9.726298214404015e-07, "loss": 0.6531, "step": 933 }, { "epoch": 0.15069377218457566, "grad_norm": 1.827408609466462, "learning_rate": 9.725426989773692e-07, "loss": 0.4651, "step": 934 }, { "epoch": 0.15085511455308165, "grad_norm": 1.9487264925142658, "learning_rate": 9.724554419886586e-07, "loss": 0.5691, "step": 935 }, { "epoch": 0.1510164569215876, "grad_norm": 2.147468931175436, "learning_rate": 9.723680504991107e-07, "loss": 0.5423, "step": 936 }, { "epoch": 0.1511777992900936, "grad_norm": 1.8566647178712727, "learning_rate": 9.722805245336044e-07, "loss": 0.6931, "step": 937 }, { "epoch": 0.15133914165859955, "grad_norm": 2.0017788843762974, "learning_rate": 9.721928641170571e-07, "loss": 0.5764, "step": 938 }, { "epoch": 0.15150048402710553, "grad_norm": 1.618974002966349, "learning_rate": 9.721050692744243e-07, "loss": 0.6239, "step": 939 }, { "epoch": 0.1516618263956115, "grad_norm": 2.0129726020854894, "learning_rate": 9.720171400306997e-07, "loss": 0.7072, "step": 940 }, { "epoch": 0.15182316876411744, "grad_norm": 2.0985466969156876, "learning_rate": 9.719290764109158e-07, "loss": 0.6842, "step": 941 }, { "epoch": 0.15198451113262343, "grad_norm": 2.0259611085960416, "learning_rate": 9.718408784401427e-07, "loss": 0.7294, "step": 942 }, { "epoch": 0.15214585350112939, "grad_norm": 1.7557049132778362, "learning_rate": 9.71752546143489e-07, "loss": 0.4888, "step": 943 }, { "epoch": 0.15230719586963537, "grad_norm": 1.3079903649141502, "learning_rate": 9.716640795461016e-07, "loss": 0.4798, "step": 944 }, { "epoch": 0.15246853823814133, "grad_norm": 1.7121702308292153, "learning_rate": 9.715754786731656e-07, "loss": 0.5152, "step": 945 }, { "epoch": 0.1526298806066473, "grad_norm": 1.903437306002186, "learning_rate": 9.714867435499044e-07, "loss": 0.6082, "step": 946 }, { "epoch": 0.15279122297515327, "grad_norm": 1.8769204794816448, "learning_rate": 9.713978742015793e-07, "loss": 0.6623, "step": 947 }, { "epoch": 0.15295256534365925, "grad_norm": 1.8460916423099478, "learning_rate": 9.713088706534903e-07, "loss": 0.4382, "step": 948 }, { "epoch": 0.1531139077121652, "grad_norm": 1.4944942679169257, "learning_rate": 9.712197329309753e-07, "loss": 0.4724, "step": 949 }, { "epoch": 0.1532752500806712, "grad_norm": 2.0136061619748817, "learning_rate": 9.711304610594102e-07, "loss": 0.8187, "step": 950 }, { "epoch": 0.15343659244917715, "grad_norm": 2.0676124095686776, "learning_rate": 9.710410550642096e-07, "loss": 0.5149, "step": 951 }, { "epoch": 0.15359793481768313, "grad_norm": 2.2736278336280957, "learning_rate": 9.70951514970826e-07, "loss": 0.5117, "step": 952 }, { "epoch": 0.1537592771861891, "grad_norm": 2.056144748765212, "learning_rate": 9.7086184080475e-07, "loss": 0.6556, "step": 953 }, { "epoch": 0.15392061955469508, "grad_norm": 1.925405593352625, "learning_rate": 9.707720325915103e-07, "loss": 0.7168, "step": 954 }, { "epoch": 0.15408196192320103, "grad_norm": 2.2029395059183625, "learning_rate": 9.706820903566743e-07, "loss": 0.6947, "step": 955 }, { "epoch": 0.154243304291707, "grad_norm": 2.828558282697771, "learning_rate": 9.70592014125847e-07, "loss": 0.7149, "step": 956 }, { "epoch": 0.15440464666021297, "grad_norm": 1.622430530583237, "learning_rate": 9.705018039246717e-07, "loss": 0.4862, "step": 957 }, { "epoch": 0.15456598902871893, "grad_norm": 1.6390597187960496, "learning_rate": 9.704114597788299e-07, "loss": 0.489, "step": 958 }, { "epoch": 0.15472733139722492, "grad_norm": 2.009753761960858, "learning_rate": 9.703209817140412e-07, "loss": 0.497, "step": 959 }, { "epoch": 0.15488867376573087, "grad_norm": 1.7661627566637574, "learning_rate": 9.702303697560633e-07, "loss": 0.692, "step": 960 }, { "epoch": 0.15505001613423686, "grad_norm": 1.5871794399500705, "learning_rate": 9.70139623930692e-07, "loss": 0.4646, "step": 961 }, { "epoch": 0.1552113585027428, "grad_norm": 1.7882792661563374, "learning_rate": 9.700487442637612e-07, "loss": 0.572, "step": 962 }, { "epoch": 0.1553727008712488, "grad_norm": 1.8987684550088497, "learning_rate": 9.699577307811431e-07, "loss": 0.5798, "step": 963 }, { "epoch": 0.15553404323975475, "grad_norm": 1.9058851924835611, "learning_rate": 9.69866583508748e-07, "loss": 0.4634, "step": 964 }, { "epoch": 0.15569538560826074, "grad_norm": 2.538619252176177, "learning_rate": 9.697753024725237e-07, "loss": 0.5959, "step": 965 }, { "epoch": 0.1558567279767667, "grad_norm": 1.5858457942813184, "learning_rate": 9.696838876984567e-07, "loss": 0.5379, "step": 966 }, { "epoch": 0.15601807034527268, "grad_norm": 1.7904736812759006, "learning_rate": 9.695923392125717e-07, "loss": 0.473, "step": 967 }, { "epoch": 0.15617941271377864, "grad_norm": 2.265077722154881, "learning_rate": 9.695006570409305e-07, "loss": 0.7681, "step": 968 }, { "epoch": 0.15634075508228462, "grad_norm": 3.3093887689032058, "learning_rate": 9.694088412096343e-07, "loss": 0.652, "step": 969 }, { "epoch": 0.15650209745079058, "grad_norm": 2.6174818186597597, "learning_rate": 9.693168917448212e-07, "loss": 0.7295, "step": 970 }, { "epoch": 0.15666343981929653, "grad_norm": 4.240807633764668, "learning_rate": 9.69224808672668e-07, "loss": 0.641, "step": 971 }, { "epoch": 0.15682478218780252, "grad_norm": 1.829707186468401, "learning_rate": 9.691325920193892e-07, "loss": 0.5672, "step": 972 }, { "epoch": 0.15698612455630848, "grad_norm": 1.5343085125038407, "learning_rate": 9.690402418112377e-07, "loss": 0.6667, "step": 973 }, { "epoch": 0.15714746692481446, "grad_norm": 1.7142758482694553, "learning_rate": 9.689477580745041e-07, "loss": 0.5364, "step": 974 }, { "epoch": 0.15730880929332042, "grad_norm": 1.8837965930018936, "learning_rate": 9.68855140835517e-07, "loss": 0.4542, "step": 975 }, { "epoch": 0.1574701516618264, "grad_norm": 1.5750976078124024, "learning_rate": 9.687623901206432e-07, "loss": 0.546, "step": 976 }, { "epoch": 0.15763149403033236, "grad_norm": 1.599247072804849, "learning_rate": 9.686695059562874e-07, "loss": 0.3774, "step": 977 }, { "epoch": 0.15779283639883834, "grad_norm": 1.5806885095570238, "learning_rate": 9.685764883688924e-07, "loss": 0.5132, "step": 978 }, { "epoch": 0.1579541787673443, "grad_norm": 1.9635136920207743, "learning_rate": 9.684833373849385e-07, "loss": 0.7367, "step": 979 }, { "epoch": 0.15811552113585028, "grad_norm": 1.4248420862070403, "learning_rate": 9.683900530309448e-07, "loss": 0.6353, "step": 980 }, { "epoch": 0.15827686350435624, "grad_norm": 2.0398215565520585, "learning_rate": 9.682966353334678e-07, "loss": 0.7192, "step": 981 }, { "epoch": 0.15843820587286223, "grad_norm": 2.1626069720496743, "learning_rate": 9.68203084319102e-07, "loss": 0.6076, "step": 982 }, { "epoch": 0.15859954824136818, "grad_norm": 1.998725723586766, "learning_rate": 9.6810940001448e-07, "loss": 0.557, "step": 983 }, { "epoch": 0.15876089060987417, "grad_norm": 2.3499053489616064, "learning_rate": 9.680155824462723e-07, "loss": 0.5997, "step": 984 }, { "epoch": 0.15892223297838012, "grad_norm": 2.2631513048027028, "learning_rate": 9.679216316411873e-07, "loss": 0.7736, "step": 985 }, { "epoch": 0.15908357534688608, "grad_norm": 1.7379414086907863, "learning_rate": 9.678275476259713e-07, "loss": 0.5416, "step": 986 }, { "epoch": 0.15924491771539206, "grad_norm": 1.5578961257144956, "learning_rate": 9.677333304274086e-07, "loss": 0.6115, "step": 987 }, { "epoch": 0.15940626008389802, "grad_norm": 2.04909203123379, "learning_rate": 9.676389800723218e-07, "loss": 0.6685, "step": 988 }, { "epoch": 0.159567602452404, "grad_norm": 2.384426083398767, "learning_rate": 9.675444965875703e-07, "loss": 0.65, "step": 989 }, { "epoch": 0.15972894482090996, "grad_norm": 1.7474527894547798, "learning_rate": 9.674498800000527e-07, "loss": 0.5684, "step": 990 }, { "epoch": 0.15989028718941595, "grad_norm": 1.8287858991039254, "learning_rate": 9.673551303367042e-07, "loss": 0.6524, "step": 991 }, { "epoch": 0.1600516295579219, "grad_norm": 1.9307921795938163, "learning_rate": 9.672602476244993e-07, "loss": 0.6954, "step": 992 }, { "epoch": 0.1602129719264279, "grad_norm": 1.236862862456583, "learning_rate": 9.671652318904495e-07, "loss": 0.4487, "step": 993 }, { "epoch": 0.16037431429493385, "grad_norm": 2.8174328249189737, "learning_rate": 9.670700831616042e-07, "loss": 0.5587, "step": 994 }, { "epoch": 0.16053565666343983, "grad_norm": 2.101284760341389, "learning_rate": 9.669748014650505e-07, "loss": 0.6078, "step": 995 }, { "epoch": 0.1606969990319458, "grad_norm": 1.9237103886086186, "learning_rate": 9.668793868279141e-07, "loss": 0.5959, "step": 996 }, { "epoch": 0.16085834140045177, "grad_norm": 1.2504902355174352, "learning_rate": 9.667838392773581e-07, "loss": 0.4676, "step": 997 }, { "epoch": 0.16101968376895773, "grad_norm": 1.5527071200683855, "learning_rate": 9.66688158840583e-07, "loss": 0.5817, "step": 998 }, { "epoch": 0.16118102613746368, "grad_norm": 1.8464590956585818, "learning_rate": 9.66592345544828e-07, "loss": 0.4376, "step": 999 }, { "epoch": 0.16134236850596967, "grad_norm": 1.999708631272404, "learning_rate": 9.664963994173693e-07, "loss": 0.6042, "step": 1000 }, { "epoch": 0.16150371087447563, "grad_norm": 1.5524774686741727, "learning_rate": 9.664003204855217e-07, "loss": 0.5382, "step": 1001 }, { "epoch": 0.1616650532429816, "grad_norm": 2.027417131981177, "learning_rate": 9.663041087766371e-07, "loss": 0.7312, "step": 1002 }, { "epoch": 0.16182639561148757, "grad_norm": 1.9547796335927008, "learning_rate": 9.662077643181055e-07, "loss": 0.6294, "step": 1003 }, { "epoch": 0.16198773797999355, "grad_norm": 1.7777667202539416, "learning_rate": 9.66111287137355e-07, "loss": 0.6102, "step": 1004 }, { "epoch": 0.1621490803484995, "grad_norm": 1.2929289197981422, "learning_rate": 9.660146772618506e-07, "loss": 0.3918, "step": 1005 }, { "epoch": 0.1623104227170055, "grad_norm": 2.0770071364506535, "learning_rate": 9.659179347190962e-07, "loss": 0.732, "step": 1006 }, { "epoch": 0.16247176508551145, "grad_norm": 1.8690293935686078, "learning_rate": 9.658210595366328e-07, "loss": 0.6572, "step": 1007 }, { "epoch": 0.16263310745401743, "grad_norm": 2.306489608433616, "learning_rate": 9.65724051742039e-07, "loss": 0.6304, "step": 1008 }, { "epoch": 0.1627944498225234, "grad_norm": 1.7408288011116035, "learning_rate": 9.656269113629318e-07, "loss": 0.5935, "step": 1009 }, { "epoch": 0.16295579219102937, "grad_norm": 2.6354536172038046, "learning_rate": 9.655296384269656e-07, "loss": 0.7345, "step": 1010 }, { "epoch": 0.16311713455953533, "grad_norm": 1.544084131320553, "learning_rate": 9.65432232961832e-07, "loss": 0.4734, "step": 1011 }, { "epoch": 0.16327847692804132, "grad_norm": 2.101221900765677, "learning_rate": 9.653346949952613e-07, "loss": 0.598, "step": 1012 }, { "epoch": 0.16343981929654727, "grad_norm": 1.5895659932815216, "learning_rate": 9.652370245550212e-07, "loss": 0.5599, "step": 1013 }, { "epoch": 0.16360116166505323, "grad_norm": 1.5627455709123377, "learning_rate": 9.651392216689165e-07, "loss": 0.4179, "step": 1014 }, { "epoch": 0.16376250403355921, "grad_norm": 1.726342303703844, "learning_rate": 9.650412863647905e-07, "loss": 0.6279, "step": 1015 }, { "epoch": 0.16392384640206517, "grad_norm": 1.6903392077758714, "learning_rate": 9.649432186705237e-07, "loss": 0.544, "step": 1016 }, { "epoch": 0.16408518877057116, "grad_norm": 1.5007234259392102, "learning_rate": 9.648450186140348e-07, "loss": 0.6724, "step": 1017 }, { "epoch": 0.1642465311390771, "grad_norm": 1.731472576720621, "learning_rate": 9.647466862232796e-07, "loss": 0.7435, "step": 1018 }, { "epoch": 0.1644078735075831, "grad_norm": 1.5249091449626186, "learning_rate": 9.646482215262518e-07, "loss": 0.6091, "step": 1019 }, { "epoch": 0.16456921587608905, "grad_norm": 1.5550233104467324, "learning_rate": 9.645496245509827e-07, "loss": 0.6167, "step": 1020 }, { "epoch": 0.16473055824459504, "grad_norm": 1.885877654762445, "learning_rate": 9.644508953255418e-07, "loss": 0.5696, "step": 1021 }, { "epoch": 0.164891900613101, "grad_norm": 2.1085786799838933, "learning_rate": 9.643520338780354e-07, "loss": 0.8153, "step": 1022 }, { "epoch": 0.16505324298160698, "grad_norm": 2.0929945536550347, "learning_rate": 9.642530402366078e-07, "loss": 0.6134, "step": 1023 }, { "epoch": 0.16521458535011294, "grad_norm": 1.6076654038829987, "learning_rate": 9.641539144294412e-07, "loss": 0.4976, "step": 1024 }, { "epoch": 0.16537592771861892, "grad_norm": 1.815875494743086, "learning_rate": 9.640546564847551e-07, "loss": 0.5947, "step": 1025 }, { "epoch": 0.16553727008712488, "grad_norm": 2.2584239303415568, "learning_rate": 9.639552664308068e-07, "loss": 0.6235, "step": 1026 }, { "epoch": 0.16569861245563086, "grad_norm": 1.6797506631020125, "learning_rate": 9.63855744295891e-07, "loss": 0.7314, "step": 1027 }, { "epoch": 0.16585995482413682, "grad_norm": 1.5634421750444247, "learning_rate": 9.637560901083403e-07, "loss": 0.4325, "step": 1028 }, { "epoch": 0.16602129719264277, "grad_norm": 1.6585958889602905, "learning_rate": 9.636563038965246e-07, "loss": 0.6509, "step": 1029 }, { "epoch": 0.16618263956114876, "grad_norm": 2.269624219030889, "learning_rate": 9.635563856888515e-07, "loss": 0.642, "step": 1030 }, { "epoch": 0.16634398192965472, "grad_norm": 1.708467416230991, "learning_rate": 9.634563355137664e-07, "loss": 0.5115, "step": 1031 }, { "epoch": 0.1665053242981607, "grad_norm": 2.024310538574972, "learning_rate": 9.633561533997515e-07, "loss": 0.7031, "step": 1032 }, { "epoch": 0.16666666666666666, "grad_norm": 1.57388786833099, "learning_rate": 9.632558393753279e-07, "loss": 0.4803, "step": 1033 }, { "epoch": 0.16682800903517264, "grad_norm": 1.65105284998944, "learning_rate": 9.631553934690528e-07, "loss": 0.6446, "step": 1034 }, { "epoch": 0.1669893514036786, "grad_norm": 1.9862754312163167, "learning_rate": 9.63054815709522e-07, "loss": 0.7771, "step": 1035 }, { "epoch": 0.16715069377218458, "grad_norm": 1.646386411745923, "learning_rate": 9.629541061253682e-07, "loss": 0.6411, "step": 1036 }, { "epoch": 0.16731203614069054, "grad_norm": 1.3931272047529406, "learning_rate": 9.62853264745262e-07, "loss": 0.523, "step": 1037 }, { "epoch": 0.16747337850919652, "grad_norm": 2.1506877420192536, "learning_rate": 9.627522915979114e-07, "loss": 0.5768, "step": 1038 }, { "epoch": 0.16763472087770248, "grad_norm": 1.7034353970794769, "learning_rate": 9.62651186712062e-07, "loss": 0.6468, "step": 1039 }, { "epoch": 0.16779606324620847, "grad_norm": 1.9640826652905856, "learning_rate": 9.625499501164967e-07, "loss": 0.6916, "step": 1040 }, { "epoch": 0.16795740561471442, "grad_norm": 1.856987646711979, "learning_rate": 9.624485818400359e-07, "loss": 0.5684, "step": 1041 }, { "epoch": 0.1681187479832204, "grad_norm": 1.9770627199256723, "learning_rate": 9.623470819115378e-07, "loss": 0.6272, "step": 1042 }, { "epoch": 0.16828009035172636, "grad_norm": 1.6680370022033697, "learning_rate": 9.622454503598977e-07, "loss": 0.5959, "step": 1043 }, { "epoch": 0.16844143272023232, "grad_norm": 1.7501919504888337, "learning_rate": 9.621436872140489e-07, "loss": 0.7704, "step": 1044 }, { "epoch": 0.1686027750887383, "grad_norm": 1.494558876687573, "learning_rate": 9.620417925029614e-07, "loss": 0.6229, "step": 1045 }, { "epoch": 0.16876411745724426, "grad_norm": 1.8592144552210566, "learning_rate": 9.619397662556433e-07, "loss": 0.5273, "step": 1046 }, { "epoch": 0.16892545982575025, "grad_norm": 1.7406906060776672, "learning_rate": 9.6183760850114e-07, "loss": 0.4133, "step": 1047 }, { "epoch": 0.1690868021942562, "grad_norm": 1.485486788741986, "learning_rate": 9.617353192685337e-07, "loss": 0.4766, "step": 1048 }, { "epoch": 0.1692481445627622, "grad_norm": 2.61274410366787, "learning_rate": 9.616328985869453e-07, "loss": 0.4602, "step": 1049 }, { "epoch": 0.16940948693126814, "grad_norm": 2.0262514085900074, "learning_rate": 9.61530346485532e-07, "loss": 0.6083, "step": 1050 }, { "epoch": 0.16957082929977413, "grad_norm": 1.605197558737143, "learning_rate": 9.614276629934887e-07, "loss": 0.7215, "step": 1051 }, { "epoch": 0.16973217166828009, "grad_norm": 2.266773643026198, "learning_rate": 9.613248481400482e-07, "loss": 0.5717, "step": 1052 }, { "epoch": 0.16989351403678607, "grad_norm": 1.6297084379085518, "learning_rate": 9.6122190195448e-07, "loss": 0.5964, "step": 1053 }, { "epoch": 0.17005485640529203, "grad_norm": 1.618782546846617, "learning_rate": 9.611188244660914e-07, "loss": 0.475, "step": 1054 }, { "epoch": 0.170216198773798, "grad_norm": 1.6655520367700076, "learning_rate": 9.610156157042272e-07, "loss": 0.4695, "step": 1055 }, { "epoch": 0.17037754114230397, "grad_norm": 1.4789757762109061, "learning_rate": 9.609122756982691e-07, "loss": 0.6233, "step": 1056 }, { "epoch": 0.17053888351080995, "grad_norm": 1.8118923747245141, "learning_rate": 9.608088044776365e-07, "loss": 0.6266, "step": 1057 }, { "epoch": 0.1707002258793159, "grad_norm": 1.5051072118598412, "learning_rate": 9.60705202071786e-07, "loss": 0.5591, "step": 1058 }, { "epoch": 0.17086156824782187, "grad_norm": 2.8690081644350918, "learning_rate": 9.606014685102116e-07, "loss": 0.5698, "step": 1059 }, { "epoch": 0.17102291061632785, "grad_norm": 1.270194200400938, "learning_rate": 9.60497603822445e-07, "loss": 0.5941, "step": 1060 }, { "epoch": 0.1711842529848338, "grad_norm": 2.159836371193334, "learning_rate": 9.603936080380543e-07, "loss": 0.8527, "step": 1061 }, { "epoch": 0.1713455953533398, "grad_norm": 1.8832426727326579, "learning_rate": 9.60289481186646e-07, "loss": 0.5033, "step": 1062 }, { "epoch": 0.17150693772184575, "grad_norm": 1.602026300031061, "learning_rate": 9.601852232978634e-07, "loss": 0.4658, "step": 1063 }, { "epoch": 0.17166828009035173, "grad_norm": 1.9293950897940428, "learning_rate": 9.600808344013867e-07, "loss": 0.6712, "step": 1064 }, { "epoch": 0.1718296224588577, "grad_norm": 2.2335110574674455, "learning_rate": 9.599763145269343e-07, "loss": 0.7268, "step": 1065 }, { "epoch": 0.17199096482736367, "grad_norm": 1.6313121418425491, "learning_rate": 9.598716637042612e-07, "loss": 0.4498, "step": 1066 }, { "epoch": 0.17215230719586963, "grad_norm": 1.7239110965154105, "learning_rate": 9.597668819631598e-07, "loss": 0.6611, "step": 1067 }, { "epoch": 0.17231364956437561, "grad_norm": 2.2310595690247963, "learning_rate": 9.5966196933346e-07, "loss": 0.8266, "step": 1068 }, { "epoch": 0.17247499193288157, "grad_norm": 1.8762619857862708, "learning_rate": 9.595569258450289e-07, "loss": 0.4489, "step": 1069 }, { "epoch": 0.17263633430138756, "grad_norm": 2.249404086565912, "learning_rate": 9.594517515277704e-07, "loss": 0.7215, "step": 1070 }, { "epoch": 0.1727976766698935, "grad_norm": 1.8872975474533131, "learning_rate": 9.593464464116265e-07, "loss": 0.6944, "step": 1071 }, { "epoch": 0.1729590190383995, "grad_norm": 1.366924587583477, "learning_rate": 9.592410105265758e-07, "loss": 0.4007, "step": 1072 }, { "epoch": 0.17312036140690545, "grad_norm": 1.8157577833775402, "learning_rate": 9.59135443902634e-07, "loss": 0.6691, "step": 1073 }, { "epoch": 0.1732817037754114, "grad_norm": 1.7576239590726874, "learning_rate": 9.590297465698545e-07, "loss": 0.6305, "step": 1074 }, { "epoch": 0.1734430461439174, "grad_norm": 1.7421790836969144, "learning_rate": 9.589239185583279e-07, "loss": 0.457, "step": 1075 }, { "epoch": 0.17360438851242335, "grad_norm": 1.838945108587051, "learning_rate": 9.588179598981815e-07, "loss": 0.5387, "step": 1076 }, { "epoch": 0.17376573088092934, "grad_norm": 1.274803505510215, "learning_rate": 9.587118706195806e-07, "loss": 0.5021, "step": 1077 }, { "epoch": 0.1739270732494353, "grad_norm": 2.3735855307507987, "learning_rate": 9.586056507527264e-07, "loss": 0.6931, "step": 1078 }, { "epoch": 0.17408841561794128, "grad_norm": 1.6050969271047157, "learning_rate": 9.584993003278587e-07, "loss": 0.5587, "step": 1079 }, { "epoch": 0.17424975798644723, "grad_norm": 2.1518000142651736, "learning_rate": 9.583928193752537e-07, "loss": 0.6595, "step": 1080 }, { "epoch": 0.17441110035495322, "grad_norm": 1.3942011594790227, "learning_rate": 9.58286207925225e-07, "loss": 0.3516, "step": 1081 }, { "epoch": 0.17457244272345918, "grad_norm": 1.8646851055423328, "learning_rate": 9.581794660081229e-07, "loss": 0.4157, "step": 1082 }, { "epoch": 0.17473378509196516, "grad_norm": 1.6220245263001012, "learning_rate": 9.580725936543355e-07, "loss": 0.7305, "step": 1083 }, { "epoch": 0.17489512746047112, "grad_norm": 1.8644927302960728, "learning_rate": 9.579655908942877e-07, "loss": 0.6043, "step": 1084 }, { "epoch": 0.1750564698289771, "grad_norm": 1.5921286769554037, "learning_rate": 9.578584577584413e-07, "loss": 0.6677, "step": 1085 }, { "epoch": 0.17521781219748306, "grad_norm": 2.0363527028298085, "learning_rate": 9.577511942772957e-07, "loss": 0.4696, "step": 1086 }, { "epoch": 0.17537915456598901, "grad_norm": 1.895859085858281, "learning_rate": 9.57643800481387e-07, "loss": 0.5998, "step": 1087 }, { "epoch": 0.175540496934495, "grad_norm": 1.5235177043440713, "learning_rate": 9.575362764012889e-07, "loss": 0.5412, "step": 1088 }, { "epoch": 0.17570183930300096, "grad_norm": 1.8857492044753899, "learning_rate": 9.574286220676116e-07, "loss": 0.6051, "step": 1089 }, { "epoch": 0.17586318167150694, "grad_norm": 2.300440298312711, "learning_rate": 9.573208375110025e-07, "loss": 0.8524, "step": 1090 }, { "epoch": 0.1760245240400129, "grad_norm": 4.769782982134017, "learning_rate": 9.572129227621467e-07, "loss": 0.4625, "step": 1091 }, { "epoch": 0.17618586640851888, "grad_norm": 1.846893088418387, "learning_rate": 9.571048778517652e-07, "loss": 0.603, "step": 1092 }, { "epoch": 0.17634720877702484, "grad_norm": 1.5902779850781494, "learning_rate": 9.569967028106173e-07, "loss": 0.4278, "step": 1093 }, { "epoch": 0.17650855114553082, "grad_norm": 2.0396610711479126, "learning_rate": 9.568883976694987e-07, "loss": 0.75, "step": 1094 }, { "epoch": 0.17666989351403678, "grad_norm": 2.0691697907728295, "learning_rate": 9.56779962459242e-07, "loss": 0.6291, "step": 1095 }, { "epoch": 0.17683123588254276, "grad_norm": 2.2981374994746724, "learning_rate": 9.566713972107172e-07, "loss": 0.6463, "step": 1096 }, { "epoch": 0.17699257825104872, "grad_norm": 1.5996822578119232, "learning_rate": 9.565627019548312e-07, "loss": 0.4884, "step": 1097 }, { "epoch": 0.1771539206195547, "grad_norm": 1.7372007290644722, "learning_rate": 9.564538767225276e-07, "loss": 0.5981, "step": 1098 }, { "epoch": 0.17731526298806066, "grad_norm": 1.6849645711643617, "learning_rate": 9.563449215447877e-07, "loss": 0.5017, "step": 1099 }, { "epoch": 0.17747660535656665, "grad_norm": 2.4147146387304286, "learning_rate": 9.56235836452629e-07, "loss": 0.7645, "step": 1100 }, { "epoch": 0.1776379477250726, "grad_norm": 3.5924809080929307, "learning_rate": 9.561266214771065e-07, "loss": 0.7895, "step": 1101 }, { "epoch": 0.17779929009357856, "grad_norm": 1.4021613977398928, "learning_rate": 9.560172766493122e-07, "loss": 0.6543, "step": 1102 }, { "epoch": 0.17796063246208454, "grad_norm": 1.4558146206563516, "learning_rate": 9.559078020003746e-07, "loss": 0.4563, "step": 1103 }, { "epoch": 0.1781219748305905, "grad_norm": 2.0146213368799515, "learning_rate": 9.557981975614595e-07, "loss": 0.6652, "step": 1104 }, { "epoch": 0.17828331719909649, "grad_norm": 1.4536921717129412, "learning_rate": 9.5568846336377e-07, "loss": 0.8029, "step": 1105 }, { "epoch": 0.17844465956760244, "grad_norm": 1.4202274376773056, "learning_rate": 9.55578599438545e-07, "loss": 0.6321, "step": 1106 }, { "epoch": 0.17860600193610843, "grad_norm": 1.9482436560144842, "learning_rate": 9.554686058170619e-07, "loss": 0.5432, "step": 1107 }, { "epoch": 0.17876734430461438, "grad_norm": 1.7589795116990148, "learning_rate": 9.553584825306337e-07, "loss": 0.5123, "step": 1108 }, { "epoch": 0.17892868667312037, "grad_norm": 1.918807893059842, "learning_rate": 9.55248229610611e-07, "loss": 0.6819, "step": 1109 }, { "epoch": 0.17909002904162633, "grad_norm": 2.1979831727679344, "learning_rate": 9.55137847088381e-07, "loss": 0.4388, "step": 1110 }, { "epoch": 0.1792513714101323, "grad_norm": 1.6212196393137126, "learning_rate": 9.550273349953681e-07, "loss": 0.6917, "step": 1111 }, { "epoch": 0.17941271377863827, "grad_norm": 1.3530574755305975, "learning_rate": 9.54916693363033e-07, "loss": 0.4931, "step": 1112 }, { "epoch": 0.17957405614714425, "grad_norm": 1.295568750524931, "learning_rate": 9.548059222228743e-07, "loss": 0.5559, "step": 1113 }, { "epoch": 0.1797353985156502, "grad_norm": 1.8511621871408177, "learning_rate": 9.546950216064264e-07, "loss": 0.6143, "step": 1114 }, { "epoch": 0.1798967408841562, "grad_norm": 1.774316712618307, "learning_rate": 9.545839915452611e-07, "loss": 0.5959, "step": 1115 }, { "epoch": 0.18005808325266215, "grad_norm": 2.2772730821274543, "learning_rate": 9.544728320709871e-07, "loss": 0.7542, "step": 1116 }, { "epoch": 0.1802194256211681, "grad_norm": 1.8193133317461327, "learning_rate": 9.543615432152496e-07, "loss": 0.5877, "step": 1117 }, { "epoch": 0.1803807679896741, "grad_norm": 2.2344185351418715, "learning_rate": 9.542501250097309e-07, "loss": 0.9055, "step": 1118 }, { "epoch": 0.18054211035818005, "grad_norm": 2.0546866877449084, "learning_rate": 9.5413857748615e-07, "loss": 0.5485, "step": 1119 }, { "epoch": 0.18070345272668603, "grad_norm": 1.736300250452928, "learning_rate": 9.54026900676263e-07, "loss": 0.5275, "step": 1120 }, { "epoch": 0.180864795095192, "grad_norm": 2.431145528004982, "learning_rate": 9.539150946118623e-07, "loss": 0.8564, "step": 1121 }, { "epoch": 0.18102613746369797, "grad_norm": 1.8797706312916158, "learning_rate": 9.538031593247774e-07, "loss": 0.6085, "step": 1122 }, { "epoch": 0.18118747983220393, "grad_norm": 1.5449633104480163, "learning_rate": 9.536910948468751e-07, "loss": 0.6704, "step": 1123 }, { "epoch": 0.1813488222007099, "grad_norm": 1.6462006058520455, "learning_rate": 9.535789012100577e-07, "loss": 0.5583, "step": 1124 }, { "epoch": 0.18151016456921587, "grad_norm": 1.7342707284736438, "learning_rate": 9.534665784462654e-07, "loss": 0.463, "step": 1125 }, { "epoch": 0.18167150693772185, "grad_norm": 2.1423141608992036, "learning_rate": 9.533541265874747e-07, "loss": 0.7835, "step": 1126 }, { "epoch": 0.1818328493062278, "grad_norm": 1.383277771688044, "learning_rate": 9.532415456656988e-07, "loss": 0.3045, "step": 1127 }, { "epoch": 0.1819941916747338, "grad_norm": 1.9219770947142427, "learning_rate": 9.531288357129881e-07, "loss": 0.6629, "step": 1128 }, { "epoch": 0.18215553404323975, "grad_norm": 1.9451702632762395, "learning_rate": 9.530159967614291e-07, "loss": 0.6435, "step": 1129 }, { "epoch": 0.18231687641174574, "grad_norm": 1.5547956112791106, "learning_rate": 9.529030288431455e-07, "loss": 0.392, "step": 1130 }, { "epoch": 0.1824782187802517, "grad_norm": 1.7778578465365151, "learning_rate": 9.527899319902973e-07, "loss": 0.6968, "step": 1131 }, { "epoch": 0.18263956114875765, "grad_norm": 1.8198350271974104, "learning_rate": 9.526767062350816e-07, "loss": 0.8102, "step": 1132 }, { "epoch": 0.18280090351726364, "grad_norm": 2.1385757773073286, "learning_rate": 9.525633516097321e-07, "loss": 0.7498, "step": 1133 }, { "epoch": 0.1829622458857696, "grad_norm": 1.76694567414532, "learning_rate": 9.52449868146519e-07, "loss": 0.481, "step": 1134 }, { "epoch": 0.18312358825427558, "grad_norm": 1.4669064253572532, "learning_rate": 9.523362558777494e-07, "loss": 0.7737, "step": 1135 }, { "epoch": 0.18328493062278153, "grad_norm": 3.1006522200100295, "learning_rate": 9.52222514835767e-07, "loss": 0.6052, "step": 1136 }, { "epoch": 0.18344627299128752, "grad_norm": 1.5752049993889994, "learning_rate": 9.52108645052952e-07, "loss": 0.592, "step": 1137 }, { "epoch": 0.18360761535979347, "grad_norm": 1.791534115819155, "learning_rate": 9.519946465617217e-07, "loss": 0.5552, "step": 1138 }, { "epoch": 0.18376895772829946, "grad_norm": 1.5505143327441129, "learning_rate": 9.518805193945292e-07, "loss": 0.3852, "step": 1139 }, { "epoch": 0.18393030009680542, "grad_norm": 1.578965973326175, "learning_rate": 9.517662635838653e-07, "loss": 0.6927, "step": 1140 }, { "epoch": 0.1840916424653114, "grad_norm": 2.2295777366690084, "learning_rate": 9.516518791622563e-07, "loss": 0.7955, "step": 1141 }, { "epoch": 0.18425298483381736, "grad_norm": 1.738811489199952, "learning_rate": 9.515373661622663e-07, "loss": 0.6585, "step": 1142 }, { "epoch": 0.18441432720232334, "grad_norm": 1.895961701235679, "learning_rate": 9.514227246164951e-07, "loss": 0.6205, "step": 1143 }, { "epoch": 0.1845756695708293, "grad_norm": 1.5410055684775632, "learning_rate": 9.513079545575793e-07, "loss": 0.5189, "step": 1144 }, { "epoch": 0.18473701193933528, "grad_norm": 1.5194007013329096, "learning_rate": 9.511930560181925e-07, "loss": 0.655, "step": 1145 }, { "epoch": 0.18489835430784124, "grad_norm": 1.4449180270904596, "learning_rate": 9.510780290310441e-07, "loss": 0.5608, "step": 1146 }, { "epoch": 0.1850596966763472, "grad_norm": 1.7958147984703972, "learning_rate": 9.50962873628881e-07, "loss": 0.7433, "step": 1147 }, { "epoch": 0.18522103904485318, "grad_norm": 1.3646050177251419, "learning_rate": 9.508475898444858e-07, "loss": 0.4668, "step": 1148 }, { "epoch": 0.18538238141335914, "grad_norm": 1.4138094006377901, "learning_rate": 9.507321777106782e-07, "loss": 0.6575, "step": 1149 }, { "epoch": 0.18554372378186512, "grad_norm": 1.5242033769466772, "learning_rate": 9.506166372603143e-07, "loss": 0.4347, "step": 1150 }, { "epoch": 0.18570506615037108, "grad_norm": 1.542137907365175, "learning_rate": 9.505009685262868e-07, "loss": 0.4168, "step": 1151 }, { "epoch": 0.18586640851887706, "grad_norm": 2.7600868939805134, "learning_rate": 9.503851715415245e-07, "loss": 0.7537, "step": 1152 }, { "epoch": 0.18602775088738302, "grad_norm": 1.8502848277046517, "learning_rate": 9.502692463389935e-07, "loss": 0.5684, "step": 1153 }, { "epoch": 0.186189093255889, "grad_norm": 1.5436250087779122, "learning_rate": 9.501531929516953e-07, "loss": 0.3976, "step": 1154 }, { "epoch": 0.18635043562439496, "grad_norm": 2.223726413852606, "learning_rate": 9.50037011412669e-07, "loss": 0.6188, "step": 1155 }, { "epoch": 0.18651177799290095, "grad_norm": 2.2062940598537955, "learning_rate": 9.499207017549895e-07, "loss": 0.4315, "step": 1156 }, { "epoch": 0.1866731203614069, "grad_norm": 1.4401524163752104, "learning_rate": 9.498042640117686e-07, "loss": 0.6427, "step": 1157 }, { "epoch": 0.1868344627299129, "grad_norm": 1.8143205378680027, "learning_rate": 9.496876982161542e-07, "loss": 0.4671, "step": 1158 }, { "epoch": 0.18699580509841884, "grad_norm": 1.5660161700382174, "learning_rate": 9.495710044013306e-07, "loss": 0.5697, "step": 1159 }, { "epoch": 0.18715714746692483, "grad_norm": 1.6083962139955033, "learning_rate": 9.494541826005191e-07, "loss": 0.481, "step": 1160 }, { "epoch": 0.18731848983543078, "grad_norm": 1.6701120368700544, "learning_rate": 9.493372328469769e-07, "loss": 0.5843, "step": 1161 }, { "epoch": 0.18747983220393674, "grad_norm": 1.6445685085688042, "learning_rate": 9.492201551739978e-07, "loss": 0.6202, "step": 1162 }, { "epoch": 0.18764117457244273, "grad_norm": 1.9027921864814068, "learning_rate": 9.491029496149121e-07, "loss": 0.452, "step": 1163 }, { "epoch": 0.18780251694094868, "grad_norm": 1.2719982884143615, "learning_rate": 9.489856162030863e-07, "loss": 0.4895, "step": 1164 }, { "epoch": 0.18796385930945467, "grad_norm": 1.885190862408239, "learning_rate": 9.488681549719235e-07, "loss": 0.7929, "step": 1165 }, { "epoch": 0.18812520167796062, "grad_norm": 2.0121252149292794, "learning_rate": 9.487505659548632e-07, "loss": 0.585, "step": 1166 }, { "epoch": 0.1882865440464666, "grad_norm": 1.9179085840550358, "learning_rate": 9.486328491853812e-07, "loss": 0.5998, "step": 1167 }, { "epoch": 0.18844788641497257, "grad_norm": 1.4808889311122875, "learning_rate": 9.485150046969894e-07, "loss": 0.499, "step": 1168 }, { "epoch": 0.18860922878347855, "grad_norm": 1.674414159324694, "learning_rate": 9.483970325232365e-07, "loss": 0.495, "step": 1169 }, { "epoch": 0.1887705711519845, "grad_norm": 2.221203159746464, "learning_rate": 9.482789326977075e-07, "loss": 0.6435, "step": 1170 }, { "epoch": 0.1889319135204905, "grad_norm": 1.3697801693432599, "learning_rate": 9.481607052540234e-07, "loss": 0.5367, "step": 1171 }, { "epoch": 0.18909325588899645, "grad_norm": 1.6126504546952916, "learning_rate": 9.480423502258419e-07, "loss": 0.353, "step": 1172 }, { "epoch": 0.18925459825750243, "grad_norm": 1.6063590817946984, "learning_rate": 9.479238676468569e-07, "loss": 0.5288, "step": 1173 }, { "epoch": 0.1894159406260084, "grad_norm": 2.0453837546226663, "learning_rate": 9.478052575507982e-07, "loss": 0.5776, "step": 1174 }, { "epoch": 0.18957728299451435, "grad_norm": 1.5359287793366925, "learning_rate": 9.476865199714328e-07, "loss": 0.7118, "step": 1175 }, { "epoch": 0.18973862536302033, "grad_norm": 2.144318450208775, "learning_rate": 9.475676549425631e-07, "loss": 0.7257, "step": 1176 }, { "epoch": 0.1898999677315263, "grad_norm": 1.7331451316899604, "learning_rate": 9.474486624980284e-07, "loss": 0.4874, "step": 1177 }, { "epoch": 0.19006131010003227, "grad_norm": 1.52255340745581, "learning_rate": 9.473295426717038e-07, "loss": 0.4013, "step": 1178 }, { "epoch": 0.19022265246853823, "grad_norm": 1.5732940153823411, "learning_rate": 9.472102954975012e-07, "loss": 0.4364, "step": 1179 }, { "epoch": 0.1903839948370442, "grad_norm": 1.5184799519390824, "learning_rate": 9.470909210093681e-07, "loss": 0.42, "step": 1180 }, { "epoch": 0.19054533720555017, "grad_norm": 1.8566384573710522, "learning_rate": 9.469714192412887e-07, "loss": 0.63, "step": 1181 }, { "epoch": 0.19070667957405615, "grad_norm": 1.768951657934476, "learning_rate": 9.468517902272834e-07, "loss": 0.4441, "step": 1182 }, { "epoch": 0.1908680219425621, "grad_norm": 2.067374970462217, "learning_rate": 9.467320340014089e-07, "loss": 0.461, "step": 1183 }, { "epoch": 0.1910293643110681, "grad_norm": 2.4621886932800088, "learning_rate": 9.466121505977576e-07, "loss": 0.8798, "step": 1184 }, { "epoch": 0.19119070667957405, "grad_norm": 2.379573233673228, "learning_rate": 9.464921400504587e-07, "loss": 0.5972, "step": 1185 }, { "epoch": 0.19135204904808004, "grad_norm": 1.767353933037065, "learning_rate": 9.463720023936774e-07, "loss": 0.6826, "step": 1186 }, { "epoch": 0.191513391416586, "grad_norm": 1.9337577364963463, "learning_rate": 9.462517376616149e-07, "loss": 0.7432, "step": 1187 }, { "epoch": 0.19167473378509198, "grad_norm": 1.802596495078478, "learning_rate": 9.461313458885089e-07, "loss": 0.6695, "step": 1188 }, { "epoch": 0.19183607615359793, "grad_norm": 1.4378352396277216, "learning_rate": 9.460108271086328e-07, "loss": 0.4946, "step": 1189 }, { "epoch": 0.1919974185221039, "grad_norm": 1.6408519043007916, "learning_rate": 9.45890181356297e-07, "loss": 0.4539, "step": 1190 }, { "epoch": 0.19215876089060988, "grad_norm": 1.496170161514532, "learning_rate": 9.457694086658468e-07, "loss": 0.5903, "step": 1191 }, { "epoch": 0.19232010325911583, "grad_norm": 1.7111322997516638, "learning_rate": 9.456485090716648e-07, "loss": 0.457, "step": 1192 }, { "epoch": 0.19248144562762182, "grad_norm": 1.9133974126503837, "learning_rate": 9.455274826081693e-07, "loss": 0.5867, "step": 1193 }, { "epoch": 0.19264278799612777, "grad_norm": 1.4501938328590935, "learning_rate": 9.454063293098144e-07, "loss": 0.5761, "step": 1194 }, { "epoch": 0.19280413036463376, "grad_norm": 1.7844699734133365, "learning_rate": 9.452850492110908e-07, "loss": 0.3939, "step": 1195 }, { "epoch": 0.19296547273313971, "grad_norm": 1.950967802844736, "learning_rate": 9.45163642346525e-07, "loss": 0.6498, "step": 1196 }, { "epoch": 0.1931268151016457, "grad_norm": 1.4616416009619033, "learning_rate": 9.450421087506798e-07, "loss": 0.3005, "step": 1197 }, { "epoch": 0.19328815747015166, "grad_norm": 2.1931266355467662, "learning_rate": 9.449204484581538e-07, "loss": 0.6735, "step": 1198 }, { "epoch": 0.19344949983865764, "grad_norm": 1.830708235453242, "learning_rate": 9.447986615035819e-07, "loss": 0.476, "step": 1199 }, { "epoch": 0.1936108422071636, "grad_norm": 2.2102449768099075, "learning_rate": 9.446767479216349e-07, "loss": 0.5543, "step": 1200 }, { "epoch": 0.19377218457566958, "grad_norm": 1.6678373119730818, "learning_rate": 9.445547077470197e-07, "loss": 0.4515, "step": 1201 }, { "epoch": 0.19393352694417554, "grad_norm": 1.6440402858838, "learning_rate": 9.444325410144796e-07, "loss": 0.6915, "step": 1202 }, { "epoch": 0.19409486931268152, "grad_norm": 1.7076095893286793, "learning_rate": 9.443102477587932e-07, "loss": 0.64, "step": 1203 }, { "epoch": 0.19425621168118748, "grad_norm": 2.083215201525445, "learning_rate": 9.441878280147757e-07, "loss": 0.6375, "step": 1204 }, { "epoch": 0.19441755404969344, "grad_norm": 1.8050660235938965, "learning_rate": 9.440652818172782e-07, "loss": 0.6161, "step": 1205 }, { "epoch": 0.19457889641819942, "grad_norm": 1.8559472039375773, "learning_rate": 9.439426092011875e-07, "loss": 0.4855, "step": 1206 }, { "epoch": 0.19474023878670538, "grad_norm": 1.7050575700812156, "learning_rate": 9.43819810201427e-07, "loss": 0.5689, "step": 1207 }, { "epoch": 0.19490158115521136, "grad_norm": 1.7150124573324657, "learning_rate": 9.436968848529552e-07, "loss": 0.6193, "step": 1208 }, { "epoch": 0.19506292352371732, "grad_norm": 1.6859899758417651, "learning_rate": 9.435738331907675e-07, "loss": 0.4748, "step": 1209 }, { "epoch": 0.1952242658922233, "grad_norm": 1.4409188787960951, "learning_rate": 9.434506552498946e-07, "loss": 0.7318, "step": 1210 }, { "epoch": 0.19538560826072926, "grad_norm": 1.41171009966718, "learning_rate": 9.433273510654034e-07, "loss": 0.6479, "step": 1211 }, { "epoch": 0.19554695062923524, "grad_norm": 1.6402686322407942, "learning_rate": 9.432039206723967e-07, "loss": 0.5185, "step": 1212 }, { "epoch": 0.1957082929977412, "grad_norm": 1.8292677478479333, "learning_rate": 9.430803641060134e-07, "loss": 0.5143, "step": 1213 }, { "epoch": 0.19586963536624719, "grad_norm": 1.7996877134481635, "learning_rate": 9.429566814014281e-07, "loss": 0.5642, "step": 1214 }, { "epoch": 0.19603097773475314, "grad_norm": 1.8869245270846675, "learning_rate": 9.42832872593851e-07, "loss": 0.656, "step": 1215 }, { "epoch": 0.19619232010325913, "grad_norm": 1.5080729837691509, "learning_rate": 9.427089377185291e-07, "loss": 0.6286, "step": 1216 }, { "epoch": 0.19635366247176508, "grad_norm": 1.9858061668254907, "learning_rate": 9.425848768107448e-07, "loss": 0.5648, "step": 1217 }, { "epoch": 0.19651500484027107, "grad_norm": 2.0862517959468145, "learning_rate": 9.424606899058158e-07, "loss": 0.6894, "step": 1218 }, { "epoch": 0.19667634720877702, "grad_norm": 1.6298479975872775, "learning_rate": 9.423363770390965e-07, "loss": 0.5948, "step": 1219 }, { "epoch": 0.19683768957728298, "grad_norm": 1.7647954291643313, "learning_rate": 9.42211938245977e-07, "loss": 0.7197, "step": 1220 }, { "epoch": 0.19699903194578897, "grad_norm": 1.884666574672233, "learning_rate": 9.42087373561883e-07, "loss": 0.6748, "step": 1221 }, { "epoch": 0.19716037431429492, "grad_norm": 1.5416485897069758, "learning_rate": 9.419626830222761e-07, "loss": 0.4338, "step": 1222 }, { "epoch": 0.1973217166828009, "grad_norm": 1.8740457013642975, "learning_rate": 9.418378666626538e-07, "loss": 0.6122, "step": 1223 }, { "epoch": 0.19748305905130686, "grad_norm": 1.6443794524749826, "learning_rate": 9.417129245185497e-07, "loss": 0.7374, "step": 1224 }, { "epoch": 0.19764440141981285, "grad_norm": 1.8095012060763789, "learning_rate": 9.415878566255326e-07, "loss": 0.4975, "step": 1225 }, { "epoch": 0.1978057437883188, "grad_norm": 3.682536648051938, "learning_rate": 9.414626630192073e-07, "loss": 0.6756, "step": 1226 }, { "epoch": 0.1979670861568248, "grad_norm": 1.8030655743675437, "learning_rate": 9.41337343735215e-07, "loss": 0.547, "step": 1227 }, { "epoch": 0.19812842852533075, "grad_norm": 1.5575506022699845, "learning_rate": 9.412118988092317e-07, "loss": 0.7331, "step": 1228 }, { "epoch": 0.19828977089383673, "grad_norm": 1.6340233360641225, "learning_rate": 9.410863282769699e-07, "loss": 0.4717, "step": 1229 }, { "epoch": 0.1984511132623427, "grad_norm": 1.7670101707524155, "learning_rate": 9.409606321741774e-07, "loss": 0.4216, "step": 1230 }, { "epoch": 0.19861245563084867, "grad_norm": 1.859810016937962, "learning_rate": 9.408348105366384e-07, "loss": 0.4917, "step": 1231 }, { "epoch": 0.19877379799935463, "grad_norm": 1.639056518660142, "learning_rate": 9.407088634001718e-07, "loss": 0.5077, "step": 1232 }, { "epoch": 0.1989351403678606, "grad_norm": 1.8215373097613623, "learning_rate": 9.405827908006333e-07, "loss": 0.5211, "step": 1233 }, { "epoch": 0.19909648273636657, "grad_norm": 1.973926822407489, "learning_rate": 9.404565927739137e-07, "loss": 0.6067, "step": 1234 }, { "epoch": 0.19925782510487253, "grad_norm": 1.7880142005851465, "learning_rate": 9.403302693559394e-07, "loss": 0.5191, "step": 1235 }, { "epoch": 0.1994191674733785, "grad_norm": 1.2109366140054723, "learning_rate": 9.402038205826732e-07, "loss": 0.4964, "step": 1236 }, { "epoch": 0.19958050984188447, "grad_norm": 1.4273627719958366, "learning_rate": 9.400772464901128e-07, "loss": 0.5626, "step": 1237 }, { "epoch": 0.19974185221039045, "grad_norm": 1.8004021566215773, "learning_rate": 9.399505471142919e-07, "loss": 0.5244, "step": 1238 }, { "epoch": 0.1999031945788964, "grad_norm": 1.6607166735607037, "learning_rate": 9.398237224912801e-07, "loss": 0.4624, "step": 1239 }, { "epoch": 0.2000645369474024, "grad_norm": 1.2972814543463869, "learning_rate": 9.396967726571823e-07, "loss": 0.4653, "step": 1240 }, { "epoch": 0.20022587931590835, "grad_norm": 1.668086385040584, "learning_rate": 9.395696976481391e-07, "loss": 0.6647, "step": 1241 }, { "epoch": 0.20038722168441434, "grad_norm": 1.4828123199173362, "learning_rate": 9.394424975003269e-07, "loss": 0.5701, "step": 1242 }, { "epoch": 0.2005485640529203, "grad_norm": 1.8574850911996388, "learning_rate": 9.393151722499578e-07, "loss": 0.5859, "step": 1243 }, { "epoch": 0.20070990642142628, "grad_norm": 2.080542055071207, "learning_rate": 9.391877219332789e-07, "loss": 0.6632, "step": 1244 }, { "epoch": 0.20087124878993223, "grad_norm": 1.7800375844437866, "learning_rate": 9.390601465865737e-07, "loss": 0.562, "step": 1245 }, { "epoch": 0.20103259115843822, "grad_norm": 1.5107709885597014, "learning_rate": 9.38932446246161e-07, "loss": 0.6069, "step": 1246 }, { "epoch": 0.20119393352694417, "grad_norm": 1.6567830361623712, "learning_rate": 9.388046209483949e-07, "loss": 0.5281, "step": 1247 }, { "epoch": 0.20135527589545016, "grad_norm": 1.973553685674154, "learning_rate": 9.386766707296655e-07, "loss": 0.5399, "step": 1248 }, { "epoch": 0.20151661826395612, "grad_norm": 1.535648024514111, "learning_rate": 9.385485956263981e-07, "loss": 0.5439, "step": 1249 }, { "epoch": 0.20167796063246207, "grad_norm": 2.5609965682418214, "learning_rate": 9.38420395675054e-07, "loss": 0.723, "step": 1250 }, { "epoch": 0.20183930300096806, "grad_norm": 1.2121605388084873, "learning_rate": 9.382920709121293e-07, "loss": 0.4468, "step": 1251 }, { "epoch": 0.202000645369474, "grad_norm": 1.553976231720956, "learning_rate": 9.381636213741566e-07, "loss": 0.5783, "step": 1252 }, { "epoch": 0.20216198773798, "grad_norm": 1.7271498289601994, "learning_rate": 9.380350470977032e-07, "loss": 0.4589, "step": 1253 }, { "epoch": 0.20232333010648595, "grad_norm": 1.4951379975129573, "learning_rate": 9.379063481193725e-07, "loss": 0.7507, "step": 1254 }, { "epoch": 0.20248467247499194, "grad_norm": 1.9536624626713663, "learning_rate": 9.377775244758027e-07, "loss": 0.7131, "step": 1255 }, { "epoch": 0.2026460148434979, "grad_norm": 1.9760613692016435, "learning_rate": 9.376485762036683e-07, "loss": 0.6026, "step": 1256 }, { "epoch": 0.20280735721200388, "grad_norm": 1.4687991032610133, "learning_rate": 9.375195033396788e-07, "loss": 0.4913, "step": 1257 }, { "epoch": 0.20296869958050984, "grad_norm": 1.7716279061073643, "learning_rate": 9.373903059205792e-07, "loss": 0.671, "step": 1258 }, { "epoch": 0.20313004194901582, "grad_norm": 1.5063513757298082, "learning_rate": 9.372609839831501e-07, "loss": 0.5728, "step": 1259 }, { "epoch": 0.20329138431752178, "grad_norm": 1.535725805803351, "learning_rate": 9.371315375642075e-07, "loss": 0.6282, "step": 1260 }, { "epoch": 0.20345272668602776, "grad_norm": 1.518265537809968, "learning_rate": 9.370019667006026e-07, "loss": 0.5768, "step": 1261 }, { "epoch": 0.20361406905453372, "grad_norm": 2.0019376428517197, "learning_rate": 9.368722714292227e-07, "loss": 0.7635, "step": 1262 }, { "epoch": 0.2037754114230397, "grad_norm": 1.8756517549085514, "learning_rate": 9.367424517869895e-07, "loss": 0.5407, "step": 1263 }, { "epoch": 0.20393675379154566, "grad_norm": 1.352902007725027, "learning_rate": 9.36612507810861e-07, "loss": 0.5005, "step": 1264 }, { "epoch": 0.20409809616005162, "grad_norm": 1.55986074706428, "learning_rate": 9.364824395378303e-07, "loss": 0.616, "step": 1265 }, { "epoch": 0.2042594385285576, "grad_norm": 2.1507415069444313, "learning_rate": 9.363522470049256e-07, "loss": 0.4813, "step": 1266 }, { "epoch": 0.20442078089706356, "grad_norm": 2.21389884909263, "learning_rate": 9.36221930249211e-07, "loss": 0.8732, "step": 1267 }, { "epoch": 0.20458212326556954, "grad_norm": 2.116168237809005, "learning_rate": 9.360914893077856e-07, "loss": 0.6006, "step": 1268 }, { "epoch": 0.2047434656340755, "grad_norm": 1.398688139378503, "learning_rate": 9.359609242177838e-07, "loss": 0.4782, "step": 1269 }, { "epoch": 0.20490480800258148, "grad_norm": 2.1405964870363823, "learning_rate": 9.358302350163756e-07, "loss": 0.7595, "step": 1270 }, { "epoch": 0.20506615037108744, "grad_norm": 1.564180233665706, "learning_rate": 9.356994217407661e-07, "loss": 0.4634, "step": 1271 }, { "epoch": 0.20522749273959343, "grad_norm": 2.082765603592799, "learning_rate": 9.355684844281961e-07, "loss": 0.7026, "step": 1272 }, { "epoch": 0.20538883510809938, "grad_norm": 2.825121470177618, "learning_rate": 9.354374231159412e-07, "loss": 0.6921, "step": 1273 }, { "epoch": 0.20555017747660537, "grad_norm": 2.0324613040557824, "learning_rate": 9.353062378413127e-07, "loss": 0.4337, "step": 1274 }, { "epoch": 0.20571151984511132, "grad_norm": 1.9543714284543987, "learning_rate": 9.35174928641657e-07, "loss": 0.5739, "step": 1275 }, { "epoch": 0.2058728622136173, "grad_norm": 1.8655073834888896, "learning_rate": 9.350434955543557e-07, "loss": 0.3671, "step": 1276 }, { "epoch": 0.20603420458212326, "grad_norm": 1.7075820139098783, "learning_rate": 9.34911938616826e-07, "loss": 0.6732, "step": 1277 }, { "epoch": 0.20619554695062922, "grad_norm": 1.610758066775584, "learning_rate": 9.347802578665198e-07, "loss": 0.5833, "step": 1278 }, { "epoch": 0.2063568893191352, "grad_norm": 1.5370489590900385, "learning_rate": 9.346484533409252e-07, "loss": 0.4917, "step": 1279 }, { "epoch": 0.20651823168764116, "grad_norm": 1.7059003283015943, "learning_rate": 9.345165250775642e-07, "loss": 0.5117, "step": 1280 }, { "epoch": 0.20667957405614715, "grad_norm": 1.723955213960743, "learning_rate": 9.343844731139954e-07, "loss": 0.443, "step": 1281 }, { "epoch": 0.2068409164246531, "grad_norm": 1.6327856326288337, "learning_rate": 9.342522974878115e-07, "loss": 0.5688, "step": 1282 }, { "epoch": 0.2070022587931591, "grad_norm": 1.8746897758702807, "learning_rate": 9.341199982366412e-07, "loss": 0.5735, "step": 1283 }, { "epoch": 0.20716360116166505, "grad_norm": 1.6148811147765834, "learning_rate": 9.339875753981478e-07, "loss": 0.6131, "step": 1284 }, { "epoch": 0.20732494353017103, "grad_norm": 1.5175741973786852, "learning_rate": 9.338550290100305e-07, "loss": 0.3887, "step": 1285 }, { "epoch": 0.207486285898677, "grad_norm": 1.7726299447015614, "learning_rate": 9.337223591100228e-07, "loss": 0.6556, "step": 1286 }, { "epoch": 0.20764762826718297, "grad_norm": 1.5259675755486997, "learning_rate": 9.335895657358936e-07, "loss": 0.4167, "step": 1287 }, { "epoch": 0.20780897063568893, "grad_norm": 1.7730069078847674, "learning_rate": 9.334566489254479e-07, "loss": 0.5908, "step": 1288 }, { "epoch": 0.2079703130041949, "grad_norm": 1.6290471224166085, "learning_rate": 9.333236087165243e-07, "loss": 0.6181, "step": 1289 }, { "epoch": 0.20813165537270087, "grad_norm": 1.7385004141156744, "learning_rate": 9.331904451469981e-07, "loss": 0.5387, "step": 1290 }, { "epoch": 0.20829299774120685, "grad_norm": 2.0627633129000493, "learning_rate": 9.330571582547781e-07, "loss": 0.7677, "step": 1291 }, { "epoch": 0.2084543401097128, "grad_norm": 1.7949145693625128, "learning_rate": 9.329237480778097e-07, "loss": 0.3915, "step": 1292 }, { "epoch": 0.20861568247821877, "grad_norm": 2.075233225756609, "learning_rate": 9.327902146540723e-07, "loss": 0.5416, "step": 1293 }, { "epoch": 0.20877702484672475, "grad_norm": 1.3568414778326545, "learning_rate": 9.326565580215811e-07, "loss": 0.3572, "step": 1294 }, { "epoch": 0.2089383672152307, "grad_norm": 1.4170811084317458, "learning_rate": 9.32522778218386e-07, "loss": 0.4108, "step": 1295 }, { "epoch": 0.2090997095837367, "grad_norm": 1.765255779719889, "learning_rate": 9.323888752825719e-07, "loss": 0.6248, "step": 1296 }, { "epoch": 0.20926105195224265, "grad_norm": 1.5776959581979926, "learning_rate": 9.322548492522593e-07, "loss": 0.3919, "step": 1297 }, { "epoch": 0.20942239432074863, "grad_norm": 1.9718359724193784, "learning_rate": 9.321207001656029e-07, "loss": 0.5373, "step": 1298 }, { "epoch": 0.2095837366892546, "grad_norm": 2.079266686285757, "learning_rate": 9.319864280607934e-07, "loss": 0.5127, "step": 1299 }, { "epoch": 0.20974507905776058, "grad_norm": 1.6811719855037228, "learning_rate": 9.318520329760555e-07, "loss": 0.5991, "step": 1300 }, { "epoch": 0.20990642142626653, "grad_norm": 1.7874614097857857, "learning_rate": 9.317175149496497e-07, "loss": 0.6062, "step": 1301 }, { "epoch": 0.21006776379477252, "grad_norm": 1.4542905173622533, "learning_rate": 9.315828740198713e-07, "loss": 0.5982, "step": 1302 }, { "epoch": 0.21022910616327847, "grad_norm": 1.8239376006662804, "learning_rate": 9.314481102250504e-07, "loss": 0.6259, "step": 1303 }, { "epoch": 0.21039044853178446, "grad_norm": 1.738577828127397, "learning_rate": 9.313132236035521e-07, "loss": 0.385, "step": 1304 }, { "epoch": 0.21055179090029041, "grad_norm": 1.6966742043743062, "learning_rate": 9.311782141937767e-07, "loss": 0.6443, "step": 1305 }, { "epoch": 0.2107131332687964, "grad_norm": 1.7631332233917363, "learning_rate": 9.310430820341593e-07, "loss": 0.7532, "step": 1306 }, { "epoch": 0.21087447563730236, "grad_norm": 1.77267560683501, "learning_rate": 9.309078271631699e-07, "loss": 0.5762, "step": 1307 }, { "epoch": 0.2110358180058083, "grad_norm": 1.4431951736564035, "learning_rate": 9.307724496193135e-07, "loss": 0.5801, "step": 1308 }, { "epoch": 0.2111971603743143, "grad_norm": 2.5824871728362506, "learning_rate": 9.3063694944113e-07, "loss": 0.5853, "step": 1309 }, { "epoch": 0.21135850274282025, "grad_norm": 1.7273302399492165, "learning_rate": 9.305013266671944e-07, "loss": 0.666, "step": 1310 }, { "epoch": 0.21151984511132624, "grad_norm": 1.4761722593970705, "learning_rate": 9.303655813361162e-07, "loss": 0.5377, "step": 1311 }, { "epoch": 0.2116811874798322, "grad_norm": 1.3598631059245847, "learning_rate": 9.3022971348654e-07, "loss": 0.6109, "step": 1312 }, { "epoch": 0.21184252984833818, "grad_norm": 1.7861791972816266, "learning_rate": 9.300937231571457e-07, "loss": 0.5651, "step": 1313 }, { "epoch": 0.21200387221684414, "grad_norm": 2.1095580975221537, "learning_rate": 9.299576103866471e-07, "loss": 0.6687, "step": 1314 }, { "epoch": 0.21216521458535012, "grad_norm": 1.8947610931977732, "learning_rate": 9.298213752137938e-07, "loss": 0.5728, "step": 1315 }, { "epoch": 0.21232655695385608, "grad_norm": 1.3744402092716184, "learning_rate": 9.296850176773699e-07, "loss": 0.5298, "step": 1316 }, { "epoch": 0.21248789932236206, "grad_norm": 2.1156783126807825, "learning_rate": 9.295485378161939e-07, "loss": 0.4636, "step": 1317 }, { "epoch": 0.21264924169086802, "grad_norm": 2.051124180777035, "learning_rate": 9.294119356691199e-07, "loss": 0.93, "step": 1318 }, { "epoch": 0.212810584059374, "grad_norm": 1.8152087129962964, "learning_rate": 9.292752112750364e-07, "loss": 0.4505, "step": 1319 }, { "epoch": 0.21297192642787996, "grad_norm": 1.177724961580073, "learning_rate": 9.291383646728667e-07, "loss": 0.4608, "step": 1320 }, { "epoch": 0.21313326879638594, "grad_norm": 1.3878449500932255, "learning_rate": 9.290013959015691e-07, "loss": 0.5068, "step": 1321 }, { "epoch": 0.2132946111648919, "grad_norm": 1.1080819774695032, "learning_rate": 9.28864305000136e-07, "loss": 0.418, "step": 1322 }, { "epoch": 0.21345595353339786, "grad_norm": 1.5129730777349628, "learning_rate": 9.287270920075956e-07, "loss": 0.3567, "step": 1323 }, { "epoch": 0.21361729590190384, "grad_norm": 1.7123311258848484, "learning_rate": 9.285897569630101e-07, "loss": 0.5803, "step": 1324 }, { "epoch": 0.2137786382704098, "grad_norm": 1.945544860934387, "learning_rate": 9.284522999054767e-07, "loss": 0.5686, "step": 1325 }, { "epoch": 0.21393998063891578, "grad_norm": 1.9886124790293103, "learning_rate": 9.283147208741275e-07, "loss": 0.5806, "step": 1326 }, { "epoch": 0.21410132300742174, "grad_norm": 1.6718424143244932, "learning_rate": 9.281770199081289e-07, "loss": 0.7209, "step": 1327 }, { "epoch": 0.21426266537592772, "grad_norm": 1.766923545059863, "learning_rate": 9.280391970466826e-07, "loss": 0.6397, "step": 1328 }, { "epoch": 0.21442400774443368, "grad_norm": 1.9988223423370037, "learning_rate": 9.279012523290242e-07, "loss": 0.8671, "step": 1329 }, { "epoch": 0.21458535011293967, "grad_norm": 1.8388684840509237, "learning_rate": 9.277631857944246e-07, "loss": 0.529, "step": 1330 }, { "epoch": 0.21474669248144562, "grad_norm": 1.8574160346130906, "learning_rate": 9.276249974821895e-07, "loss": 0.5995, "step": 1331 }, { "epoch": 0.2149080348499516, "grad_norm": 1.6521021889584284, "learning_rate": 9.274866874316588e-07, "loss": 0.6257, "step": 1332 }, { "epoch": 0.21506937721845756, "grad_norm": 1.9019614585557572, "learning_rate": 9.273482556822072e-07, "loss": 0.654, "step": 1333 }, { "epoch": 0.21523071958696355, "grad_norm": 1.6640368535747432, "learning_rate": 9.272097022732443e-07, "loss": 0.739, "step": 1334 }, { "epoch": 0.2153920619554695, "grad_norm": 1.8986786740174282, "learning_rate": 9.270710272442139e-07, "loss": 0.7167, "step": 1335 }, { "epoch": 0.2155534043239755, "grad_norm": 1.4272253796499754, "learning_rate": 9.269322306345949e-07, "loss": 0.5561, "step": 1336 }, { "epoch": 0.21571474669248145, "grad_norm": 1.9857994433782302, "learning_rate": 9.267933124839002e-07, "loss": 0.7557, "step": 1337 }, { "epoch": 0.2158760890609874, "grad_norm": 2.0256335739053157, "learning_rate": 9.266542728316779e-07, "loss": 0.5314, "step": 1338 }, { "epoch": 0.2160374314294934, "grad_norm": 1.8190171376574973, "learning_rate": 9.265151117175107e-07, "loss": 0.6348, "step": 1339 }, { "epoch": 0.21619877379799934, "grad_norm": 1.615058418847152, "learning_rate": 9.263758291810153e-07, "loss": 0.4966, "step": 1340 }, { "epoch": 0.21636011616650533, "grad_norm": 1.7999804654650862, "learning_rate": 9.262364252618434e-07, "loss": 0.5001, "step": 1341 }, { "epoch": 0.21652145853501129, "grad_norm": 1.475281497796844, "learning_rate": 9.260968999996812e-07, "loss": 0.538, "step": 1342 }, { "epoch": 0.21668280090351727, "grad_norm": 2.5811949052080267, "learning_rate": 9.259572534342495e-07, "loss": 0.6357, "step": 1343 }, { "epoch": 0.21684414327202323, "grad_norm": 2.0107654275611795, "learning_rate": 9.258174856053034e-07, "loss": 0.3526, "step": 1344 }, { "epoch": 0.2170054856405292, "grad_norm": 1.4224776782813642, "learning_rate": 9.256775965526326e-07, "loss": 0.5624, "step": 1345 }, { "epoch": 0.21716682800903517, "grad_norm": 1.8402932300358736, "learning_rate": 9.255375863160615e-07, "loss": 0.5286, "step": 1346 }, { "epoch": 0.21732817037754115, "grad_norm": 1.2440792050613927, "learning_rate": 9.253974549354488e-07, "loss": 0.48, "step": 1347 }, { "epoch": 0.2174895127460471, "grad_norm": 1.7268292665448715, "learning_rate": 9.252572024506878e-07, "loss": 0.5766, "step": 1348 }, { "epoch": 0.2176508551145531, "grad_norm": 1.4852574987679694, "learning_rate": 9.251168289017064e-07, "loss": 0.5512, "step": 1349 }, { "epoch": 0.21781219748305905, "grad_norm": 1.5267369462512872, "learning_rate": 9.249763343284664e-07, "loss": 0.3611, "step": 1350 }, { "epoch": 0.21797353985156503, "grad_norm": 1.7570479022874184, "learning_rate": 9.248357187709649e-07, "loss": 0.6207, "step": 1351 }, { "epoch": 0.218134882220071, "grad_norm": 1.8257067735766048, "learning_rate": 9.246949822692327e-07, "loss": 0.5935, "step": 1352 }, { "epoch": 0.21829622458857695, "grad_norm": 1.7386606552457933, "learning_rate": 9.245541248633355e-07, "loss": 0.5274, "step": 1353 }, { "epoch": 0.21845756695708293, "grad_norm": 1.616899695102772, "learning_rate": 9.244131465933731e-07, "loss": 0.5348, "step": 1354 }, { "epoch": 0.2186189093255889, "grad_norm": 2.4150734176691055, "learning_rate": 9.2427204749948e-07, "loss": 0.5696, "step": 1355 }, { "epoch": 0.21878025169409487, "grad_norm": 1.541089112979612, "learning_rate": 9.241308276218247e-07, "loss": 0.5778, "step": 1356 }, { "epoch": 0.21894159406260083, "grad_norm": 1.4413133710178128, "learning_rate": 9.239894870006108e-07, "loss": 0.4496, "step": 1357 }, { "epoch": 0.21910293643110682, "grad_norm": 1.2439724076360286, "learning_rate": 9.238480256760753e-07, "loss": 0.4815, "step": 1358 }, { "epoch": 0.21926427879961277, "grad_norm": 1.430688726323241, "learning_rate": 9.237064436884905e-07, "loss": 0.47, "step": 1359 }, { "epoch": 0.21942562116811876, "grad_norm": 2.0161702209695282, "learning_rate": 9.235647410781623e-07, "loss": 0.6141, "step": 1360 }, { "epoch": 0.2195869635366247, "grad_norm": 1.8240378573989657, "learning_rate": 9.234229178854314e-07, "loss": 0.6002, "step": 1361 }, { "epoch": 0.2197483059051307, "grad_norm": 1.6533019261427802, "learning_rate": 9.232809741506729e-07, "loss": 0.5046, "step": 1362 }, { "epoch": 0.21990964827363665, "grad_norm": 1.7003600748982144, "learning_rate": 9.231389099142955e-07, "loss": 0.5504, "step": 1363 }, { "epoch": 0.22007099064214264, "grad_norm": 1.905747519383273, "learning_rate": 9.229967252167432e-07, "loss": 0.6665, "step": 1364 }, { "epoch": 0.2202323330106486, "grad_norm": 2.205503978441136, "learning_rate": 9.228544200984935e-07, "loss": 0.6271, "step": 1365 }, { "epoch": 0.22039367537915455, "grad_norm": 1.3985876077400543, "learning_rate": 9.227119946000589e-07, "loss": 0.4938, "step": 1366 }, { "epoch": 0.22055501774766054, "grad_norm": 1.4881844877091037, "learning_rate": 9.225694487619853e-07, "loss": 0.4524, "step": 1367 }, { "epoch": 0.2207163601161665, "grad_norm": 1.4198446344620745, "learning_rate": 9.224267826248536e-07, "loss": 0.6404, "step": 1368 }, { "epoch": 0.22087770248467248, "grad_norm": 1.6271370260616684, "learning_rate": 9.222839962292783e-07, "loss": 0.6556, "step": 1369 }, { "epoch": 0.22103904485317843, "grad_norm": 2.2072601469728923, "learning_rate": 9.22141089615909e-07, "loss": 0.6857, "step": 1370 }, { "epoch": 0.22120038722168442, "grad_norm": 1.9257795786511827, "learning_rate": 9.219980628254287e-07, "loss": 0.674, "step": 1371 }, { "epoch": 0.22136172959019038, "grad_norm": 1.5373701676546248, "learning_rate": 9.218549158985552e-07, "loss": 0.5472, "step": 1372 }, { "epoch": 0.22152307195869636, "grad_norm": 3.0631493639020824, "learning_rate": 9.217116488760399e-07, "loss": 0.6584, "step": 1373 }, { "epoch": 0.22168441432720232, "grad_norm": 1.6912748713049623, "learning_rate": 9.21568261798669e-07, "loss": 0.5677, "step": 1374 }, { "epoch": 0.2218457566957083, "grad_norm": 1.4565752149254874, "learning_rate": 9.214247547072624e-07, "loss": 0.4833, "step": 1375 }, { "epoch": 0.22200709906421426, "grad_norm": 1.4471335820594935, "learning_rate": 9.212811276426746e-07, "loss": 0.6, "step": 1376 }, { "epoch": 0.22216844143272024, "grad_norm": 1.3995354886558837, "learning_rate": 9.211373806457939e-07, "loss": 0.4694, "step": 1377 }, { "epoch": 0.2223297838012262, "grad_norm": 2.2864966479822644, "learning_rate": 9.209935137575429e-07, "loss": 0.762, "step": 1378 }, { "epoch": 0.22249112616973218, "grad_norm": 1.3197683081884426, "learning_rate": 9.208495270188783e-07, "loss": 0.4469, "step": 1379 }, { "epoch": 0.22265246853823814, "grad_norm": 1.8480317780582163, "learning_rate": 9.207054204707909e-07, "loss": 0.6326, "step": 1380 }, { "epoch": 0.2228138109067441, "grad_norm": 1.786461818087398, "learning_rate": 9.205611941543057e-07, "loss": 0.6398, "step": 1381 }, { "epoch": 0.22297515327525008, "grad_norm": 1.8808471425768825, "learning_rate": 9.204168481104815e-07, "loss": 0.6849, "step": 1382 }, { "epoch": 0.22313649564375604, "grad_norm": 1.9116177886505592, "learning_rate": 9.202723823804117e-07, "loss": 0.686, "step": 1383 }, { "epoch": 0.22329783801226202, "grad_norm": 2.917867104676249, "learning_rate": 9.201277970052233e-07, "loss": 0.616, "step": 1384 }, { "epoch": 0.22345918038076798, "grad_norm": 1.6838142976328963, "learning_rate": 9.199830920260776e-07, "loss": 0.4446, "step": 1385 }, { "epoch": 0.22362052274927396, "grad_norm": 1.5262405821363605, "learning_rate": 9.1983826748417e-07, "loss": 0.5959, "step": 1386 }, { "epoch": 0.22378186511777992, "grad_norm": 1.733696744798116, "learning_rate": 9.196933234207296e-07, "loss": 0.6721, "step": 1387 }, { "epoch": 0.2239432074862859, "grad_norm": 1.475627946539671, "learning_rate": 9.1954825987702e-07, "loss": 0.6745, "step": 1388 }, { "epoch": 0.22410454985479186, "grad_norm": 1.2067117113764865, "learning_rate": 9.194030768943383e-07, "loss": 0.5373, "step": 1389 }, { "epoch": 0.22426589222329785, "grad_norm": 1.7881310716117278, "learning_rate": 9.192577745140163e-07, "loss": 0.4443, "step": 1390 }, { "epoch": 0.2244272345918038, "grad_norm": 2.3179916951554884, "learning_rate": 9.191123527774189e-07, "loss": 0.8451, "step": 1391 }, { "epoch": 0.2245885769603098, "grad_norm": 1.5219625522252433, "learning_rate": 9.189668117259458e-07, "loss": 0.5257, "step": 1392 }, { "epoch": 0.22474991932881574, "grad_norm": 2.0603569919544658, "learning_rate": 9.1882115140103e-07, "loss": 0.6727, "step": 1393 }, { "epoch": 0.22491126169732173, "grad_norm": 1.8775173454651712, "learning_rate": 9.186753718441392e-07, "loss": 0.5518, "step": 1394 }, { "epoch": 0.22507260406582769, "grad_norm": 1.7199180795323143, "learning_rate": 9.18529473096774e-07, "loss": 0.6191, "step": 1395 }, { "epoch": 0.22523394643433364, "grad_norm": 1.8186167402704572, "learning_rate": 9.1838345520047e-07, "loss": 0.6236, "step": 1396 }, { "epoch": 0.22539528880283963, "grad_norm": 1.646602820936048, "learning_rate": 9.182373181967961e-07, "loss": 0.5367, "step": 1397 }, { "epoch": 0.22555663117134558, "grad_norm": 1.5221652465020648, "learning_rate": 9.180910621273555e-07, "loss": 0.5196, "step": 1398 }, { "epoch": 0.22571797353985157, "grad_norm": 2.608852128505133, "learning_rate": 9.179446870337846e-07, "loss": 0.5312, "step": 1399 }, { "epoch": 0.22587931590835753, "grad_norm": 1.6884681608786418, "learning_rate": 9.177981929577545e-07, "loss": 0.3964, "step": 1400 }, { "epoch": 0.2260406582768635, "grad_norm": 1.4536043422476659, "learning_rate": 9.176515799409699e-07, "loss": 0.627, "step": 1401 }, { "epoch": 0.22620200064536947, "grad_norm": 1.7927337691966472, "learning_rate": 9.175048480251691e-07, "loss": 0.533, "step": 1402 }, { "epoch": 0.22636334301387545, "grad_norm": 1.8032546527336595, "learning_rate": 9.173579972521242e-07, "loss": 0.4546, "step": 1403 }, { "epoch": 0.2265246853823814, "grad_norm": 1.573184750513056, "learning_rate": 9.172110276636418e-07, "loss": 0.5427, "step": 1404 }, { "epoch": 0.2266860277508874, "grad_norm": 1.7064237932278477, "learning_rate": 9.170639393015618e-07, "loss": 0.6328, "step": 1405 }, { "epoch": 0.22684737011939335, "grad_norm": 1.6709411722645484, "learning_rate": 9.169167322077578e-07, "loss": 0.5174, "step": 1406 }, { "epoch": 0.22700871248789933, "grad_norm": 2.1071563355428125, "learning_rate": 9.167694064241376e-07, "loss": 0.505, "step": 1407 }, { "epoch": 0.2271700548564053, "grad_norm": 1.7166483427507997, "learning_rate": 9.166219619926425e-07, "loss": 0.4325, "step": 1408 }, { "epoch": 0.22733139722491127, "grad_norm": 2.054582252272651, "learning_rate": 9.164743989552478e-07, "loss": 0.706, "step": 1409 }, { "epoch": 0.22749273959341723, "grad_norm": 1.751239405837542, "learning_rate": 9.163267173539623e-07, "loss": 0.591, "step": 1410 }, { "epoch": 0.2276540819619232, "grad_norm": 2.2216276393285104, "learning_rate": 9.161789172308287e-07, "loss": 0.622, "step": 1411 }, { "epoch": 0.22781542433042917, "grad_norm": 1.6820462720087663, "learning_rate": 9.160309986279235e-07, "loss": 0.6002, "step": 1412 }, { "epoch": 0.22797676669893513, "grad_norm": 1.6684756792883464, "learning_rate": 9.158829615873569e-07, "loss": 0.5764, "step": 1413 }, { "epoch": 0.2281381090674411, "grad_norm": 1.5465617007093355, "learning_rate": 9.157348061512726e-07, "loss": 0.6275, "step": 1414 }, { "epoch": 0.22829945143594707, "grad_norm": 1.7892675157376958, "learning_rate": 9.155865323618484e-07, "loss": 0.4291, "step": 1415 }, { "epoch": 0.22846079380445306, "grad_norm": 1.8308925058383905, "learning_rate": 9.154381402612954e-07, "loss": 0.5633, "step": 1416 }, { "epoch": 0.228622136172959, "grad_norm": 1.338381088461031, "learning_rate": 9.152896298918587e-07, "loss": 0.5731, "step": 1417 }, { "epoch": 0.228783478541465, "grad_norm": 1.6721488514445504, "learning_rate": 9.151410012958168e-07, "loss": 0.3574, "step": 1418 }, { "epoch": 0.22894482090997095, "grad_norm": 1.906967715869754, "learning_rate": 9.149922545154822e-07, "loss": 0.5279, "step": 1419 }, { "epoch": 0.22910616327847694, "grad_norm": 1.3921423359918657, "learning_rate": 9.148433895932006e-07, "loss": 0.4929, "step": 1420 }, { "epoch": 0.2292675056469829, "grad_norm": 1.6073471181735393, "learning_rate": 9.146944065713518e-07, "loss": 0.5256, "step": 1421 }, { "epoch": 0.22942884801548888, "grad_norm": 1.975087456034384, "learning_rate": 9.145453054923486e-07, "loss": 0.7197, "step": 1422 }, { "epoch": 0.22959019038399484, "grad_norm": 1.512067493360309, "learning_rate": 9.143960863986383e-07, "loss": 0.5273, "step": 1423 }, { "epoch": 0.22975153275250082, "grad_norm": 1.6157609480248336, "learning_rate": 9.142467493327008e-07, "loss": 0.4996, "step": 1424 }, { "epoch": 0.22991287512100678, "grad_norm": 1.3352404555033361, "learning_rate": 9.140972943370506e-07, "loss": 0.4213, "step": 1425 }, { "epoch": 0.23007421748951273, "grad_norm": 2.0884731468573023, "learning_rate": 9.139477214542348e-07, "loss": 0.4309, "step": 1426 }, { "epoch": 0.23023555985801872, "grad_norm": 1.8539088709346927, "learning_rate": 9.137980307268347e-07, "loss": 0.3993, "step": 1427 }, { "epoch": 0.23039690222652467, "grad_norm": 1.646364906836763, "learning_rate": 9.13648222197465e-07, "loss": 0.568, "step": 1428 }, { "epoch": 0.23055824459503066, "grad_norm": 1.4364548076155752, "learning_rate": 9.134982959087738e-07, "loss": 0.606, "step": 1429 }, { "epoch": 0.23071958696353662, "grad_norm": 1.552984867643677, "learning_rate": 9.133482519034428e-07, "loss": 0.5172, "step": 1430 }, { "epoch": 0.2308809293320426, "grad_norm": 1.7816659542090643, "learning_rate": 9.131980902241873e-07, "loss": 0.5884, "step": 1431 }, { "epoch": 0.23104227170054856, "grad_norm": 1.217717491400452, "learning_rate": 9.130478109137562e-07, "loss": 0.4752, "step": 1432 }, { "epoch": 0.23120361406905454, "grad_norm": 1.5291086911775809, "learning_rate": 9.128974140149315e-07, "loss": 0.6026, "step": 1433 }, { "epoch": 0.2313649564375605, "grad_norm": 1.545554242641264, "learning_rate": 9.127468995705287e-07, "loss": 0.3788, "step": 1434 }, { "epoch": 0.23152629880606648, "grad_norm": 1.4521658408296418, "learning_rate": 9.125962676233974e-07, "loss": 0.4819, "step": 1435 }, { "epoch": 0.23168764117457244, "grad_norm": 1.8215382259817576, "learning_rate": 9.1244551821642e-07, "loss": 0.6637, "step": 1436 }, { "epoch": 0.23184898354307842, "grad_norm": 2.032854356268857, "learning_rate": 9.122946513925126e-07, "loss": 0.6957, "step": 1437 }, { "epoch": 0.23201032591158438, "grad_norm": 2.18621417810702, "learning_rate": 9.121436671946246e-07, "loss": 0.4712, "step": 1438 }, { "epoch": 0.23217166828009037, "grad_norm": 1.7803726712692192, "learning_rate": 9.11992565665739e-07, "loss": 0.6674, "step": 1439 }, { "epoch": 0.23233301064859632, "grad_norm": 1.6604650591695023, "learning_rate": 9.118413468488719e-07, "loss": 0.4135, "step": 1440 }, { "epoch": 0.23249435301710228, "grad_norm": 1.2990183296613653, "learning_rate": 9.116900107870731e-07, "loss": 0.3316, "step": 1441 }, { "epoch": 0.23265569538560826, "grad_norm": 1.2850024470443284, "learning_rate": 9.115385575234258e-07, "loss": 0.4994, "step": 1442 }, { "epoch": 0.23281703775411422, "grad_norm": 2.2884883225592905, "learning_rate": 9.113869871010463e-07, "loss": 0.5444, "step": 1443 }, { "epoch": 0.2329783801226202, "grad_norm": 2.157401067912123, "learning_rate": 9.112352995630843e-07, "loss": 0.6108, "step": 1444 }, { "epoch": 0.23313972249112616, "grad_norm": 3.4865107127869863, "learning_rate": 9.110834949527231e-07, "loss": 0.633, "step": 1445 }, { "epoch": 0.23330106485963215, "grad_norm": 1.3375977132880363, "learning_rate": 9.10931573313179e-07, "loss": 0.4907, "step": 1446 }, { "epoch": 0.2334624072281381, "grad_norm": 1.4601417046848604, "learning_rate": 9.107795346877018e-07, "loss": 0.4636, "step": 1447 }, { "epoch": 0.2336237495966441, "grad_norm": 1.2199503049734934, "learning_rate": 9.106273791195747e-07, "loss": 0.5392, "step": 1448 }, { "epoch": 0.23378509196515004, "grad_norm": 2.3582211129635304, "learning_rate": 9.104751066521136e-07, "loss": 0.6305, "step": 1449 }, { "epoch": 0.23394643433365603, "grad_norm": 1.9199846476696076, "learning_rate": 9.103227173286689e-07, "loss": 0.5985, "step": 1450 }, { "epoch": 0.23410777670216198, "grad_norm": 1.6956943486251732, "learning_rate": 9.10170211192623e-07, "loss": 0.4741, "step": 1451 }, { "epoch": 0.23426911907066797, "grad_norm": 1.331089794566366, "learning_rate": 9.100175882873922e-07, "loss": 0.5877, "step": 1452 }, { "epoch": 0.23443046143917393, "grad_norm": 1.7344860694306046, "learning_rate": 9.098648486564258e-07, "loss": 0.644, "step": 1453 }, { "epoch": 0.2345918038076799, "grad_norm": 1.9544310208627649, "learning_rate": 9.097119923432065e-07, "loss": 0.4959, "step": 1454 }, { "epoch": 0.23475314617618587, "grad_norm": 1.480752560429289, "learning_rate": 9.095590193912502e-07, "loss": 0.5766, "step": 1455 }, { "epoch": 0.23491448854469182, "grad_norm": 1.6297427437801129, "learning_rate": 9.09405929844106e-07, "loss": 0.588, "step": 1456 }, { "epoch": 0.2350758309131978, "grad_norm": 1.7124326957229534, "learning_rate": 9.09252723745356e-07, "loss": 0.4494, "step": 1457 }, { "epoch": 0.23523717328170377, "grad_norm": 2.482302394965954, "learning_rate": 9.090994011386157e-07, "loss": 0.6879, "step": 1458 }, { "epoch": 0.23539851565020975, "grad_norm": 2.086026307690815, "learning_rate": 9.089459620675337e-07, "loss": 0.7018, "step": 1459 }, { "epoch": 0.2355598580187157, "grad_norm": 1.4918355476778624, "learning_rate": 9.087924065757918e-07, "loss": 0.6277, "step": 1460 }, { "epoch": 0.2357212003872217, "grad_norm": 2.0082345956719454, "learning_rate": 9.08638734707105e-07, "loss": 0.6622, "step": 1461 }, { "epoch": 0.23588254275572765, "grad_norm": 1.699949821965795, "learning_rate": 9.084849465052209e-07, "loss": 0.5501, "step": 1462 }, { "epoch": 0.23604388512423363, "grad_norm": 1.892850779448144, "learning_rate": 9.083310420139212e-07, "loss": 0.5418, "step": 1463 }, { "epoch": 0.2362052274927396, "grad_norm": 1.6965000194905957, "learning_rate": 9.081770212770199e-07, "loss": 0.6613, "step": 1464 }, { "epoch": 0.23636656986124557, "grad_norm": 1.2486187455977966, "learning_rate": 9.080228843383642e-07, "loss": 0.4699, "step": 1465 }, { "epoch": 0.23652791222975153, "grad_norm": 1.6184637958152996, "learning_rate": 9.078686312418348e-07, "loss": 0.4353, "step": 1466 }, { "epoch": 0.23668925459825751, "grad_norm": 2.282974949669639, "learning_rate": 9.077142620313451e-07, "loss": 0.4586, "step": 1467 }, { "epoch": 0.23685059696676347, "grad_norm": 1.7260528088311498, "learning_rate": 9.075597767508415e-07, "loss": 0.5719, "step": 1468 }, { "epoch": 0.23701193933526943, "grad_norm": 1.6894109996328726, "learning_rate": 9.074051754443037e-07, "loss": 0.5964, "step": 1469 }, { "epoch": 0.2371732817037754, "grad_norm": 1.4334293447910778, "learning_rate": 9.072504581557445e-07, "loss": 0.4725, "step": 1470 }, { "epoch": 0.23733462407228137, "grad_norm": 1.5314624405457575, "learning_rate": 9.070956249292094e-07, "loss": 0.716, "step": 1471 }, { "epoch": 0.23749596644078735, "grad_norm": 1.8480723519384374, "learning_rate": 9.06940675808777e-07, "loss": 0.4926, "step": 1472 }, { "epoch": 0.2376573088092933, "grad_norm": 1.5338628625110526, "learning_rate": 9.06785610838559e-07, "loss": 0.4424, "step": 1473 }, { "epoch": 0.2378186511777993, "grad_norm": 1.45455288207802, "learning_rate": 9.066304300627e-07, "loss": 0.5632, "step": 1474 }, { "epoch": 0.23797999354630525, "grad_norm": 1.5014110127545293, "learning_rate": 9.064751335253776e-07, "loss": 0.5352, "step": 1475 }, { "epoch": 0.23814133591481124, "grad_norm": 1.527730751277144, "learning_rate": 9.063197212708024e-07, "loss": 0.5366, "step": 1476 }, { "epoch": 0.2383026782833172, "grad_norm": 1.693202450354822, "learning_rate": 9.061641933432178e-07, "loss": 0.5936, "step": 1477 }, { "epoch": 0.23846402065182318, "grad_norm": 1.816158745423227, "learning_rate": 9.060085497869004e-07, "loss": 0.6018, "step": 1478 }, { "epoch": 0.23862536302032913, "grad_norm": 1.9955210003991584, "learning_rate": 9.058527906461592e-07, "loss": 0.6715, "step": 1479 }, { "epoch": 0.23878670538883512, "grad_norm": 1.5813855018909273, "learning_rate": 9.05696915965337e-07, "loss": 0.6183, "step": 1480 }, { "epoch": 0.23894804775734108, "grad_norm": 1.7921701950927291, "learning_rate": 9.055409257888083e-07, "loss": 0.5545, "step": 1481 }, { "epoch": 0.23910939012584706, "grad_norm": 1.449916784595232, "learning_rate": 9.053848201609815e-07, "loss": 0.6056, "step": 1482 }, { "epoch": 0.23927073249435302, "grad_norm": 1.9946110363910352, "learning_rate": 9.052285991262974e-07, "loss": 0.5972, "step": 1483 }, { "epoch": 0.23943207486285897, "grad_norm": 1.9561364722901817, "learning_rate": 9.050722627292297e-07, "loss": 0.5324, "step": 1484 }, { "epoch": 0.23959341723136496, "grad_norm": 1.8656032976329116, "learning_rate": 9.049158110142851e-07, "loss": 0.5906, "step": 1485 }, { "epoch": 0.23975475959987091, "grad_norm": 1.7651465113620133, "learning_rate": 9.047592440260028e-07, "loss": 0.4455, "step": 1486 }, { "epoch": 0.2399161019683769, "grad_norm": 1.5978555345176995, "learning_rate": 9.046025618089552e-07, "loss": 0.447, "step": 1487 }, { "epoch": 0.24007744433688286, "grad_norm": 2.185507602696754, "learning_rate": 9.044457644077474e-07, "loss": 0.7263, "step": 1488 }, { "epoch": 0.24023878670538884, "grad_norm": 1.7985894689972115, "learning_rate": 9.04288851867017e-07, "loss": 0.4806, "step": 1489 }, { "epoch": 0.2404001290738948, "grad_norm": 2.0391750962312187, "learning_rate": 9.041318242314348e-07, "loss": 0.4685, "step": 1490 }, { "epoch": 0.24056147144240078, "grad_norm": 1.593122396126524, "learning_rate": 9.039746815457038e-07, "loss": 0.6514, "step": 1491 }, { "epoch": 0.24072281381090674, "grad_norm": 1.6498858701363885, "learning_rate": 9.038174238545608e-07, "loss": 0.576, "step": 1492 }, { "epoch": 0.24088415617941272, "grad_norm": 1.5154974972293014, "learning_rate": 9.03660051202774e-07, "loss": 0.5954, "step": 1493 }, { "epoch": 0.24104549854791868, "grad_norm": 1.9721819316825904, "learning_rate": 9.035025636351452e-07, "loss": 0.5952, "step": 1494 }, { "epoch": 0.24120684091642466, "grad_norm": 2.1767459833124074, "learning_rate": 9.033449611965089e-07, "loss": 0.6494, "step": 1495 }, { "epoch": 0.24136818328493062, "grad_norm": 1.4687559249433768, "learning_rate": 9.031872439317319e-07, "loss": 0.603, "step": 1496 }, { "epoch": 0.2415295256534366, "grad_norm": 1.8608916692591264, "learning_rate": 9.030294118857138e-07, "loss": 0.6614, "step": 1497 }, { "epoch": 0.24169086802194256, "grad_norm": 1.6609306414257132, "learning_rate": 9.028714651033873e-07, "loss": 0.5178, "step": 1498 }, { "epoch": 0.24185221039044852, "grad_norm": 2.0971707540544657, "learning_rate": 9.027134036297171e-07, "loss": 0.4997, "step": 1499 }, { "epoch": 0.2420135527589545, "grad_norm": 1.787446470726234, "learning_rate": 9.025552275097011e-07, "loss": 0.5178, "step": 1500 }, { "epoch": 0.24217489512746046, "grad_norm": 2.0958883269459565, "learning_rate": 9.023969367883695e-07, "loss": 0.6194, "step": 1501 }, { "epoch": 0.24233623749596644, "grad_norm": 2.066439363326073, "learning_rate": 9.022385315107852e-07, "loss": 0.6758, "step": 1502 }, { "epoch": 0.2424975798644724, "grad_norm": 1.8336588252725752, "learning_rate": 9.020800117220439e-07, "loss": 0.5281, "step": 1503 }, { "epoch": 0.24265892223297839, "grad_norm": 1.6264004174937, "learning_rate": 9.019213774672738e-07, "loss": 0.4785, "step": 1504 }, { "epoch": 0.24282026460148434, "grad_norm": 1.3905582197635165, "learning_rate": 9.017626287916356e-07, "loss": 0.4556, "step": 1505 }, { "epoch": 0.24298160696999033, "grad_norm": 1.622627654027976, "learning_rate": 9.016037657403223e-07, "loss": 0.6178, "step": 1506 }, { "epoch": 0.24314294933849628, "grad_norm": 1.9294595931291083, "learning_rate": 9.014447883585603e-07, "loss": 0.6527, "step": 1507 }, { "epoch": 0.24330429170700227, "grad_norm": 2.1703005785576894, "learning_rate": 9.012856966916079e-07, "loss": 0.6997, "step": 1508 }, { "epoch": 0.24346563407550822, "grad_norm": 1.693973205611215, "learning_rate": 9.011264907847556e-07, "loss": 0.3902, "step": 1509 }, { "epoch": 0.2436269764440142, "grad_norm": 2.0354560148040304, "learning_rate": 9.009671706833275e-07, "loss": 0.6948, "step": 1510 }, { "epoch": 0.24378831881252017, "grad_norm": 1.742184284019068, "learning_rate": 9.008077364326792e-07, "loss": 0.5693, "step": 1511 }, { "epoch": 0.24394966118102615, "grad_norm": 1.589181898164427, "learning_rate": 9.006481880781994e-07, "loss": 0.6549, "step": 1512 }, { "epoch": 0.2441110035495321, "grad_norm": 1.517738127650533, "learning_rate": 9.004885256653089e-07, "loss": 0.4087, "step": 1513 }, { "epoch": 0.24427234591803806, "grad_norm": 1.4116907620465982, "learning_rate": 9.003287492394613e-07, "loss": 0.341, "step": 1514 }, { "epoch": 0.24443368828654405, "grad_norm": 1.5143549208622533, "learning_rate": 9.001688588461424e-07, "loss": 0.6551, "step": 1515 }, { "epoch": 0.24459503065505, "grad_norm": 1.6119473833721292, "learning_rate": 9.000088545308706e-07, "loss": 0.5143, "step": 1516 }, { "epoch": 0.244756373023556, "grad_norm": 2.160340892771263, "learning_rate": 8.998487363391967e-07, "loss": 0.5786, "step": 1517 }, { "epoch": 0.24491771539206195, "grad_norm": 1.533251641350214, "learning_rate": 8.99688504316704e-07, "loss": 0.6392, "step": 1518 }, { "epoch": 0.24507905776056793, "grad_norm": 1.3828920244529113, "learning_rate": 8.995281585090077e-07, "loss": 0.4648, "step": 1519 }, { "epoch": 0.2452404001290739, "grad_norm": 1.6939279554285576, "learning_rate": 8.993676989617563e-07, "loss": 0.3898, "step": 1520 }, { "epoch": 0.24540174249757987, "grad_norm": 1.6244049449947775, "learning_rate": 8.992071257206299e-07, "loss": 0.4814, "step": 1521 }, { "epoch": 0.24556308486608583, "grad_norm": 1.5682109225692205, "learning_rate": 8.990464388313414e-07, "loss": 0.6109, "step": 1522 }, { "epoch": 0.2457244272345918, "grad_norm": 1.8348526366930715, "learning_rate": 8.988856383396357e-07, "loss": 0.6301, "step": 1523 }, { "epoch": 0.24588576960309777, "grad_norm": 1.5015216897267414, "learning_rate": 8.987247242912906e-07, "loss": 0.6399, "step": 1524 }, { "epoch": 0.24604711197160375, "grad_norm": 1.8797963149743153, "learning_rate": 8.985636967321153e-07, "loss": 0.4296, "step": 1525 }, { "epoch": 0.2462084543401097, "grad_norm": 1.7635921827795833, "learning_rate": 8.984025557079522e-07, "loss": 0.6905, "step": 1526 }, { "epoch": 0.2463697967086157, "grad_norm": 2.0449435607284965, "learning_rate": 8.982413012646759e-07, "loss": 0.6205, "step": 1527 }, { "epoch": 0.24653113907712165, "grad_norm": 2.0289725814628756, "learning_rate": 8.980799334481927e-07, "loss": 0.6529, "step": 1528 }, { "epoch": 0.2466924814456276, "grad_norm": 1.1555119298123493, "learning_rate": 8.979184523044418e-07, "loss": 0.3196, "step": 1529 }, { "epoch": 0.2468538238141336, "grad_norm": 2.2320918442482753, "learning_rate": 8.977568578793942e-07, "loss": 0.592, "step": 1530 }, { "epoch": 0.24701516618263955, "grad_norm": 2.2177641921730453, "learning_rate": 8.975951502190535e-07, "loss": 0.5488, "step": 1531 }, { "epoch": 0.24717650855114554, "grad_norm": 1.713578367758163, "learning_rate": 8.974333293694555e-07, "loss": 0.4346, "step": 1532 }, { "epoch": 0.2473378509196515, "grad_norm": 1.9235145584400346, "learning_rate": 8.972713953766679e-07, "loss": 0.6719, "step": 1533 }, { "epoch": 0.24749919328815748, "grad_norm": 1.3084791901959474, "learning_rate": 8.971093482867908e-07, "loss": 0.5468, "step": 1534 }, { "epoch": 0.24766053565666343, "grad_norm": 1.6633202415185528, "learning_rate": 8.969471881459569e-07, "loss": 0.5924, "step": 1535 }, { "epoch": 0.24782187802516942, "grad_norm": 2.1979206922834753, "learning_rate": 8.967849150003304e-07, "loss": 0.7622, "step": 1536 }, { "epoch": 0.24798322039367537, "grad_norm": 1.8973113436390818, "learning_rate": 8.96622528896108e-07, "loss": 0.4038, "step": 1537 }, { "epoch": 0.24814456276218136, "grad_norm": 3.5659274294791827, "learning_rate": 8.964600298795186e-07, "loss": 0.6178, "step": 1538 }, { "epoch": 0.24830590513068732, "grad_norm": 1.6577323182298789, "learning_rate": 8.962974179968232e-07, "loss": 0.4757, "step": 1539 }, { "epoch": 0.2484672474991933, "grad_norm": 1.841883927327848, "learning_rate": 8.961346932943148e-07, "loss": 0.4752, "step": 1540 }, { "epoch": 0.24862858986769926, "grad_norm": 1.9030852390388953, "learning_rate": 8.959718558183187e-07, "loss": 0.6568, "step": 1541 }, { "epoch": 0.24878993223620524, "grad_norm": 1.4874485519872478, "learning_rate": 8.958089056151923e-07, "loss": 0.4356, "step": 1542 }, { "epoch": 0.2489512746047112, "grad_norm": 1.945101745709648, "learning_rate": 8.956458427313252e-07, "loss": 0.5885, "step": 1543 }, { "epoch": 0.24911261697321715, "grad_norm": 2.028262126971459, "learning_rate": 8.954826672131383e-07, "loss": 0.6759, "step": 1544 }, { "epoch": 0.24927395934172314, "grad_norm": 4.153751626256899, "learning_rate": 8.953193791070858e-07, "loss": 0.5356, "step": 1545 }, { "epoch": 0.2494353017102291, "grad_norm": 1.6598469524609407, "learning_rate": 8.951559784596532e-07, "loss": 0.5096, "step": 1546 }, { "epoch": 0.24959664407873508, "grad_norm": 2.102680688775396, "learning_rate": 8.949924653173579e-07, "loss": 0.5532, "step": 1547 }, { "epoch": 0.24975798644724104, "grad_norm": 1.992018090319165, "learning_rate": 8.948288397267499e-07, "loss": 0.6632, "step": 1548 }, { "epoch": 0.24991932881574702, "grad_norm": 1.7529273753031407, "learning_rate": 8.946651017344107e-07, "loss": 0.6572, "step": 1549 }, { "epoch": 0.250080671184253, "grad_norm": 2.1481434013903753, "learning_rate": 8.94501251386954e-07, "loss": 0.5394, "step": 1550 }, { "epoch": 0.25024201355275894, "grad_norm": 1.510871196187523, "learning_rate": 8.943372887310257e-07, "loss": 0.5785, "step": 1551 }, { "epoch": 0.25040335592126495, "grad_norm": 1.715843586989093, "learning_rate": 8.941732138133031e-07, "loss": 0.4416, "step": 1552 }, { "epoch": 0.2505646982897709, "grad_norm": 1.952719989745755, "learning_rate": 8.940090266804961e-07, "loss": 0.6114, "step": 1553 }, { "epoch": 0.25072604065827686, "grad_norm": 1.7993238265196385, "learning_rate": 8.938447273793461e-07, "loss": 0.57, "step": 1554 }, { "epoch": 0.2508873830267828, "grad_norm": 1.3504007451100577, "learning_rate": 8.936803159566268e-07, "loss": 0.5113, "step": 1555 }, { "epoch": 0.25104872539528883, "grad_norm": 1.33143441261686, "learning_rate": 8.935157924591436e-07, "loss": 0.4551, "step": 1556 }, { "epoch": 0.2512100677637948, "grad_norm": 1.7381496422484073, "learning_rate": 8.933511569337336e-07, "loss": 0.5007, "step": 1557 }, { "epoch": 0.25137141013230074, "grad_norm": 1.6514064574352043, "learning_rate": 8.931864094272662e-07, "loss": 0.5637, "step": 1558 }, { "epoch": 0.2515327525008067, "grad_norm": 1.5293381881492691, "learning_rate": 8.930215499866425e-07, "loss": 0.3809, "step": 1559 }, { "epoch": 0.25169409486931266, "grad_norm": 1.8237258277844228, "learning_rate": 8.928565786587951e-07, "loss": 0.6834, "step": 1560 }, { "epoch": 0.25185543723781867, "grad_norm": 1.8661510831216241, "learning_rate": 8.926914954906895e-07, "loss": 0.6314, "step": 1561 }, { "epoch": 0.2520167796063246, "grad_norm": 1.7089750976310685, "learning_rate": 8.925263005293217e-07, "loss": 0.3795, "step": 1562 }, { "epoch": 0.2521781219748306, "grad_norm": 1.6264468134507974, "learning_rate": 8.923609938217206e-07, "loss": 0.3912, "step": 1563 }, { "epoch": 0.25233946434333654, "grad_norm": 1.6923938009056136, "learning_rate": 8.921955754149462e-07, "loss": 0.5198, "step": 1564 }, { "epoch": 0.25250080671184255, "grad_norm": 1.5267118039323058, "learning_rate": 8.92030045356091e-07, "loss": 0.4056, "step": 1565 }, { "epoch": 0.2526621490803485, "grad_norm": 1.7967837103168973, "learning_rate": 8.918644036922783e-07, "loss": 0.6242, "step": 1566 }, { "epoch": 0.25282349144885446, "grad_norm": 1.6050326829476333, "learning_rate": 8.916986504706642e-07, "loss": 0.5407, "step": 1567 }, { "epoch": 0.2529848338173604, "grad_norm": 1.566574810595323, "learning_rate": 8.915327857384358e-07, "loss": 0.5301, "step": 1568 }, { "epoch": 0.25314617618586643, "grad_norm": 2.053915364625019, "learning_rate": 8.913668095428126e-07, "loss": 0.6568, "step": 1569 }, { "epoch": 0.2533075185543724, "grad_norm": 1.5743842026160257, "learning_rate": 8.912007219310452e-07, "loss": 0.6161, "step": 1570 }, { "epoch": 0.25346886092287835, "grad_norm": 1.6040252358979459, "learning_rate": 8.910345229504164e-07, "loss": 0.4453, "step": 1571 }, { "epoch": 0.2536302032913843, "grad_norm": 1.4620337631564377, "learning_rate": 8.908682126482403e-07, "loss": 0.7221, "step": 1572 }, { "epoch": 0.25379154565989026, "grad_norm": 1.618422032422482, "learning_rate": 8.907017910718631e-07, "loss": 0.7387, "step": 1573 }, { "epoch": 0.2539528880283963, "grad_norm": 1.5127518152627313, "learning_rate": 8.905352582686622e-07, "loss": 0.5798, "step": 1574 }, { "epoch": 0.25411423039690223, "grad_norm": 2.0295232622684134, "learning_rate": 8.903686142860471e-07, "loss": 0.544, "step": 1575 }, { "epoch": 0.2542755727654082, "grad_norm": 2.2282834546430217, "learning_rate": 8.902018591714591e-07, "loss": 0.8578, "step": 1576 }, { "epoch": 0.25443691513391414, "grad_norm": 2.2327914728839002, "learning_rate": 8.900349929723703e-07, "loss": 0.9168, "step": 1577 }, { "epoch": 0.25459825750242016, "grad_norm": 1.7589423723350224, "learning_rate": 8.898680157362853e-07, "loss": 0.6534, "step": 1578 }, { "epoch": 0.2547595998709261, "grad_norm": 1.482687703901609, "learning_rate": 8.897009275107399e-07, "loss": 0.4867, "step": 1579 }, { "epoch": 0.25492094223943207, "grad_norm": 1.574211707466218, "learning_rate": 8.895337283433015e-07, "loss": 0.5243, "step": 1580 }, { "epoch": 0.255082284607938, "grad_norm": 1.3986792754973785, "learning_rate": 8.89366418281569e-07, "loss": 0.3755, "step": 1581 }, { "epoch": 0.25524362697644404, "grad_norm": 1.4701477055722465, "learning_rate": 8.891989973731733e-07, "loss": 0.6392, "step": 1582 }, { "epoch": 0.25540496934495, "grad_norm": 1.4258494739670486, "learning_rate": 8.890314656657766e-07, "loss": 0.5639, "step": 1583 }, { "epoch": 0.25556631171345595, "grad_norm": 1.5270373727388236, "learning_rate": 8.888638232070723e-07, "loss": 0.5034, "step": 1584 }, { "epoch": 0.2557276540819619, "grad_norm": 1.926626828396674, "learning_rate": 8.886960700447859e-07, "loss": 0.5536, "step": 1585 }, { "epoch": 0.2558889964504679, "grad_norm": 1.5839313916847704, "learning_rate": 8.88528206226674e-07, "loss": 0.4733, "step": 1586 }, { "epoch": 0.2560503388189739, "grad_norm": 1.313572127825728, "learning_rate": 8.88360231800525e-07, "loss": 0.4442, "step": 1587 }, { "epoch": 0.25621168118747983, "grad_norm": 1.8400382487841185, "learning_rate": 8.881921468141587e-07, "loss": 0.5309, "step": 1588 }, { "epoch": 0.2563730235559858, "grad_norm": 1.767691559076007, "learning_rate": 8.880239513154259e-07, "loss": 0.5391, "step": 1589 }, { "epoch": 0.25653436592449175, "grad_norm": 1.825464970654075, "learning_rate": 8.878556453522099e-07, "loss": 0.5752, "step": 1590 }, { "epoch": 0.25669570829299776, "grad_norm": 1.8949702747197734, "learning_rate": 8.876872289724245e-07, "loss": 0.6819, "step": 1591 }, { "epoch": 0.2568570506615037, "grad_norm": 1.791360636753025, "learning_rate": 8.875187022240152e-07, "loss": 0.5535, "step": 1592 }, { "epoch": 0.2570183930300097, "grad_norm": 1.5865166269147029, "learning_rate": 8.87350065154959e-07, "loss": 0.5433, "step": 1593 }, { "epoch": 0.25717973539851563, "grad_norm": 1.9275721024659696, "learning_rate": 8.871813178132644e-07, "loss": 0.6777, "step": 1594 }, { "epoch": 0.25734107776702164, "grad_norm": 2.415749167425499, "learning_rate": 8.870124602469713e-07, "loss": 0.6547, "step": 1595 }, { "epoch": 0.2575024201355276, "grad_norm": 1.4868370117363214, "learning_rate": 8.868434925041505e-07, "loss": 0.3261, "step": 1596 }, { "epoch": 0.25766376250403356, "grad_norm": 2.0424055851344103, "learning_rate": 8.866744146329051e-07, "loss": 0.5682, "step": 1597 }, { "epoch": 0.2578251048725395, "grad_norm": 1.8065180226558548, "learning_rate": 8.865052266813685e-07, "loss": 0.5687, "step": 1598 }, { "epoch": 0.2579864472410455, "grad_norm": 1.4270369343512324, "learning_rate": 8.863359286977059e-07, "loss": 0.5431, "step": 1599 }, { "epoch": 0.2581477896095515, "grad_norm": 1.724522969726415, "learning_rate": 8.861665207301142e-07, "loss": 0.6101, "step": 1600 }, { "epoch": 0.25830913197805744, "grad_norm": 1.551188610008725, "learning_rate": 8.859970028268211e-07, "loss": 0.4217, "step": 1601 }, { "epoch": 0.2584704743465634, "grad_norm": 1.6152325299216714, "learning_rate": 8.858273750360857e-07, "loss": 0.6376, "step": 1602 }, { "epoch": 0.25863181671506935, "grad_norm": 1.2324725592539434, "learning_rate": 8.856576374061984e-07, "loss": 0.36, "step": 1603 }, { "epoch": 0.25879315908357536, "grad_norm": 1.7332339270526154, "learning_rate": 8.854877899854809e-07, "loss": 0.5457, "step": 1604 }, { "epoch": 0.2589545014520813, "grad_norm": 1.5380928354260006, "learning_rate": 8.853178328222864e-07, "loss": 0.4662, "step": 1605 }, { "epoch": 0.2591158438205873, "grad_norm": 1.5521107698955499, "learning_rate": 8.851477659649989e-07, "loss": 0.4324, "step": 1606 }, { "epoch": 0.25927718618909323, "grad_norm": 1.8187286952498025, "learning_rate": 8.849775894620337e-07, "loss": 0.4526, "step": 1607 }, { "epoch": 0.25943852855759925, "grad_norm": 2.102758358062653, "learning_rate": 8.848073033618378e-07, "loss": 0.5132, "step": 1608 }, { "epoch": 0.2595998709261052, "grad_norm": 1.530169670321139, "learning_rate": 8.846369077128888e-07, "loss": 0.6785, "step": 1609 }, { "epoch": 0.25976121329461116, "grad_norm": 1.929139588442676, "learning_rate": 8.844664025636958e-07, "loss": 0.6398, "step": 1610 }, { "epoch": 0.2599225556631171, "grad_norm": 1.670080059211176, "learning_rate": 8.842957879627991e-07, "loss": 0.6539, "step": 1611 }, { "epoch": 0.26008389803162313, "grad_norm": 1.4383596254924234, "learning_rate": 8.841250639587698e-07, "loss": 0.5791, "step": 1612 }, { "epoch": 0.2602452404001291, "grad_norm": 1.5581693525615907, "learning_rate": 8.839542306002108e-07, "loss": 0.6105, "step": 1613 }, { "epoch": 0.26040658276863504, "grad_norm": 1.784875693280419, "learning_rate": 8.837832879357554e-07, "loss": 0.6101, "step": 1614 }, { "epoch": 0.260567925137141, "grad_norm": 1.9627765679315967, "learning_rate": 8.836122360140684e-07, "loss": 0.5914, "step": 1615 }, { "epoch": 0.26072926750564696, "grad_norm": 1.6951215785936975, "learning_rate": 8.834410748838459e-07, "loss": 0.5741, "step": 1616 }, { "epoch": 0.26089060987415297, "grad_norm": 1.4486974719500185, "learning_rate": 8.832698045938147e-07, "loss": 0.4353, "step": 1617 }, { "epoch": 0.2610519522426589, "grad_norm": 1.4586054502650285, "learning_rate": 8.830984251927328e-07, "loss": 0.6449, "step": 1618 }, { "epoch": 0.2612132946111649, "grad_norm": 1.4719259135841607, "learning_rate": 8.829269367293894e-07, "loss": 0.5851, "step": 1619 }, { "epoch": 0.26137463697967084, "grad_norm": 2.317394438214886, "learning_rate": 8.827553392526046e-07, "loss": 0.6841, "step": 1620 }, { "epoch": 0.26153597934817685, "grad_norm": 2.0855550236747296, "learning_rate": 8.825836328112294e-07, "loss": 0.6611, "step": 1621 }, { "epoch": 0.2616973217166828, "grad_norm": 1.5047288106632826, "learning_rate": 8.824118174541462e-07, "loss": 0.5488, "step": 1622 }, { "epoch": 0.26185866408518876, "grad_norm": 2.7463629686945334, "learning_rate": 8.822398932302684e-07, "loss": 0.6741, "step": 1623 }, { "epoch": 0.2620200064536947, "grad_norm": 1.8049939048220363, "learning_rate": 8.820678601885397e-07, "loss": 0.5131, "step": 1624 }, { "epoch": 0.26218134882220073, "grad_norm": 1.5841269846358714, "learning_rate": 8.818957183779356e-07, "loss": 0.6333, "step": 1625 }, { "epoch": 0.2623426911907067, "grad_norm": 1.3835166819221225, "learning_rate": 8.817234678474621e-07, "loss": 0.5313, "step": 1626 }, { "epoch": 0.26250403355921265, "grad_norm": 1.6036634109919683, "learning_rate": 8.815511086461566e-07, "loss": 0.352, "step": 1627 }, { "epoch": 0.2626653759277186, "grad_norm": 1.6260004631783176, "learning_rate": 8.813786408230868e-07, "loss": 0.5107, "step": 1628 }, { "epoch": 0.2628267182962246, "grad_norm": 2.126332818716517, "learning_rate": 8.812060644273515e-07, "loss": 0.6949, "step": 1629 }, { "epoch": 0.26298806066473057, "grad_norm": 1.8058456733938015, "learning_rate": 8.810333795080811e-07, "loss": 0.522, "step": 1630 }, { "epoch": 0.26314940303323653, "grad_norm": 1.721347372144562, "learning_rate": 8.808605861144359e-07, "loss": 0.611, "step": 1631 }, { "epoch": 0.2633107454017425, "grad_norm": 1.9317079538119084, "learning_rate": 8.806876842956077e-07, "loss": 0.4962, "step": 1632 }, { "epoch": 0.26347208777024844, "grad_norm": 1.5020732062743967, "learning_rate": 8.805146741008191e-07, "loss": 0.3904, "step": 1633 }, { "epoch": 0.26363343013875445, "grad_norm": 2.0750760535584614, "learning_rate": 8.803415555793232e-07, "loss": 0.7883, "step": 1634 }, { "epoch": 0.2637947725072604, "grad_norm": 1.820627602740024, "learning_rate": 8.801683287804044e-07, "loss": 0.7433, "step": 1635 }, { "epoch": 0.26395611487576637, "grad_norm": 1.7852072677166917, "learning_rate": 8.799949937533778e-07, "loss": 0.6084, "step": 1636 }, { "epoch": 0.2641174572442723, "grad_norm": 1.6518693247368177, "learning_rate": 8.79821550547589e-07, "loss": 0.4855, "step": 1637 }, { "epoch": 0.26427879961277834, "grad_norm": 1.992044181904433, "learning_rate": 8.796479992124149e-07, "loss": 0.6769, "step": 1638 }, { "epoch": 0.2644401419812843, "grad_norm": 1.5776200194730938, "learning_rate": 8.794743397972623e-07, "loss": 0.5183, "step": 1639 }, { "epoch": 0.26460148434979025, "grad_norm": 1.4137308564310798, "learning_rate": 8.793005723515703e-07, "loss": 0.5729, "step": 1640 }, { "epoch": 0.2647628267182962, "grad_norm": 1.3933638700685964, "learning_rate": 8.79126696924807e-07, "loss": 0.4148, "step": 1641 }, { "epoch": 0.2649241690868022, "grad_norm": 1.8496755650569177, "learning_rate": 8.789527135664727e-07, "loss": 0.7138, "step": 1642 }, { "epoch": 0.2650855114553082, "grad_norm": 1.7419796122552467, "learning_rate": 8.787786223260973e-07, "loss": 0.6854, "step": 1643 }, { "epoch": 0.26524685382381413, "grad_norm": 1.8048799750664175, "learning_rate": 8.786044232532422e-07, "loss": 0.4986, "step": 1644 }, { "epoch": 0.2654081961923201, "grad_norm": 1.6799834944277525, "learning_rate": 8.784301163974992e-07, "loss": 0.3979, "step": 1645 }, { "epoch": 0.26556953856082605, "grad_norm": 1.391147729588393, "learning_rate": 8.782557018084908e-07, "loss": 0.3736, "step": 1646 }, { "epoch": 0.26573088092933206, "grad_norm": 1.2664304454452422, "learning_rate": 8.7808117953587e-07, "loss": 0.4689, "step": 1647 }, { "epoch": 0.265892223297838, "grad_norm": 1.962522557608838, "learning_rate": 8.779065496293207e-07, "loss": 0.5994, "step": 1648 }, { "epoch": 0.26605356566634397, "grad_norm": 1.1724660273105778, "learning_rate": 8.777318121385575e-07, "loss": 0.3677, "step": 1649 }, { "epoch": 0.26621490803484993, "grad_norm": 2.205995353721505, "learning_rate": 8.775569671133254e-07, "loss": 0.8699, "step": 1650 }, { "epoch": 0.26637625040335594, "grad_norm": 2.0786118474896687, "learning_rate": 8.773820146033998e-07, "loss": 0.5688, "step": 1651 }, { "epoch": 0.2665375927718619, "grad_norm": 1.650415255571568, "learning_rate": 8.772069546585877e-07, "loss": 0.3646, "step": 1652 }, { "epoch": 0.26669893514036785, "grad_norm": 1.531308854198199, "learning_rate": 8.770317873287254e-07, "loss": 0.4177, "step": 1653 }, { "epoch": 0.2668602775088738, "grad_norm": 1.6839156053376483, "learning_rate": 8.768565126636805e-07, "loss": 0.5941, "step": 1654 }, { "epoch": 0.2670216198773798, "grad_norm": 1.614468264837598, "learning_rate": 8.76681130713351e-07, "loss": 0.3619, "step": 1655 }, { "epoch": 0.2671829622458858, "grad_norm": 1.4952900373631206, "learning_rate": 8.765056415276657e-07, "loss": 0.5426, "step": 1656 }, { "epoch": 0.26734430461439174, "grad_norm": 1.9380125167765287, "learning_rate": 8.763300451565834e-07, "loss": 0.5316, "step": 1657 }, { "epoch": 0.2675056469828977, "grad_norm": 1.6780849174955146, "learning_rate": 8.761543416500938e-07, "loss": 0.6003, "step": 1658 }, { "epoch": 0.2676669893514037, "grad_norm": 2.017828040509923, "learning_rate": 8.759785310582171e-07, "loss": 0.6178, "step": 1659 }, { "epoch": 0.26782833171990966, "grad_norm": 1.5062658253705012, "learning_rate": 8.758026134310037e-07, "loss": 0.6214, "step": 1660 }, { "epoch": 0.2679896740884156, "grad_norm": 1.7383946756796618, "learning_rate": 8.756265888185348e-07, "loss": 0.5807, "step": 1661 }, { "epoch": 0.2681510164569216, "grad_norm": 1.57254719552687, "learning_rate": 8.754504572709218e-07, "loss": 0.5192, "step": 1662 }, { "epoch": 0.26831235882542753, "grad_norm": 1.7754159386302162, "learning_rate": 8.752742188383068e-07, "loss": 0.4942, "step": 1663 }, { "epoch": 0.26847370119393354, "grad_norm": 1.6436843684913038, "learning_rate": 8.750978735708621e-07, "loss": 0.4103, "step": 1664 }, { "epoch": 0.2686350435624395, "grad_norm": 2.06073014587812, "learning_rate": 8.749214215187905e-07, "loss": 0.5699, "step": 1665 }, { "epoch": 0.26879638593094546, "grad_norm": 1.2568624474397996, "learning_rate": 8.747448627323251e-07, "loss": 0.5434, "step": 1666 }, { "epoch": 0.2689577282994514, "grad_norm": 1.4570558765774468, "learning_rate": 8.745681972617296e-07, "loss": 0.5424, "step": 1667 }, { "epoch": 0.2691190706679574, "grad_norm": 1.7526674377296545, "learning_rate": 8.74391425157298e-07, "loss": 0.5817, "step": 1668 }, { "epoch": 0.2692804130364634, "grad_norm": 1.4113114715784605, "learning_rate": 8.742145464693547e-07, "loss": 0.5648, "step": 1669 }, { "epoch": 0.26944175540496934, "grad_norm": 1.9826501273307868, "learning_rate": 8.74037561248254e-07, "loss": 0.4159, "step": 1670 }, { "epoch": 0.2696030977734753, "grad_norm": 1.9177665526175873, "learning_rate": 8.738604695443812e-07, "loss": 0.6473, "step": 1671 }, { "epoch": 0.2697644401419813, "grad_norm": 1.9080033290442129, "learning_rate": 8.736832714081515e-07, "loss": 0.5764, "step": 1672 }, { "epoch": 0.26992578251048727, "grad_norm": 1.6651687168865599, "learning_rate": 8.735059668900105e-07, "loss": 0.5255, "step": 1673 }, { "epoch": 0.2700871248789932, "grad_norm": 1.4485237532101058, "learning_rate": 8.733285560404341e-07, "loss": 0.5118, "step": 1674 }, { "epoch": 0.2702484672474992, "grad_norm": 1.9186582238928576, "learning_rate": 8.731510389099283e-07, "loss": 0.6776, "step": 1675 }, { "epoch": 0.27040980961600514, "grad_norm": 1.6172772811072305, "learning_rate": 8.729734155490299e-07, "loss": 0.4474, "step": 1676 }, { "epoch": 0.27057115198451115, "grad_norm": 1.375036715970977, "learning_rate": 8.727956860083052e-07, "loss": 0.4768, "step": 1677 }, { "epoch": 0.2707324943530171, "grad_norm": 1.2060053641546338, "learning_rate": 8.726178503383512e-07, "loss": 0.394, "step": 1678 }, { "epoch": 0.27089383672152306, "grad_norm": 1.702349635022562, "learning_rate": 8.724399085897949e-07, "loss": 0.6368, "step": 1679 }, { "epoch": 0.271055179090029, "grad_norm": 1.3514065955765082, "learning_rate": 8.722618608132937e-07, "loss": 0.5571, "step": 1680 }, { "epoch": 0.27121652145853503, "grad_norm": 1.3526371999454283, "learning_rate": 8.720837070595352e-07, "loss": 0.3671, "step": 1681 }, { "epoch": 0.271377863827041, "grad_norm": 1.8339980899838284, "learning_rate": 8.719054473792368e-07, "loss": 0.566, "step": 1682 }, { "epoch": 0.27153920619554694, "grad_norm": 1.338175366027076, "learning_rate": 8.717270818231463e-07, "loss": 0.4244, "step": 1683 }, { "epoch": 0.2717005485640529, "grad_norm": 1.7547155607294769, "learning_rate": 8.71548610442042e-07, "loss": 0.5642, "step": 1684 }, { "epoch": 0.2718618909325589, "grad_norm": 1.5987471234949644, "learning_rate": 8.713700332867315e-07, "loss": 0.4673, "step": 1685 }, { "epoch": 0.27202323330106487, "grad_norm": 1.2537805131753328, "learning_rate": 8.711913504080533e-07, "loss": 0.6248, "step": 1686 }, { "epoch": 0.2721845756695708, "grad_norm": 1.5353092185338082, "learning_rate": 8.710125618568756e-07, "loss": 0.5473, "step": 1687 }, { "epoch": 0.2723459180380768, "grad_norm": 1.740757924454257, "learning_rate": 8.708336676840969e-07, "loss": 0.7717, "step": 1688 }, { "epoch": 0.2725072604065828, "grad_norm": 1.4545795994782347, "learning_rate": 8.706546679406452e-07, "loss": 0.3533, "step": 1689 }, { "epoch": 0.27266860277508875, "grad_norm": 1.528943718951563, "learning_rate": 8.704755626774795e-07, "loss": 0.5368, "step": 1690 }, { "epoch": 0.2728299451435947, "grad_norm": 1.4945533730863083, "learning_rate": 8.702963519455881e-07, "loss": 0.4324, "step": 1691 }, { "epoch": 0.27299128751210067, "grad_norm": 1.8445284218546325, "learning_rate": 8.701170357959895e-07, "loss": 0.5872, "step": 1692 }, { "epoch": 0.2731526298806066, "grad_norm": 1.5421566141701641, "learning_rate": 8.699376142797323e-07, "loss": 0.4912, "step": 1693 }, { "epoch": 0.27331397224911264, "grad_norm": 1.6130561579218559, "learning_rate": 8.697580874478952e-07, "loss": 0.4472, "step": 1694 }, { "epoch": 0.2734753146176186, "grad_norm": 1.336998189392804, "learning_rate": 8.695784553515865e-07, "loss": 0.4754, "step": 1695 }, { "epoch": 0.27363665698612455, "grad_norm": 2.1575152237267745, "learning_rate": 8.69398718041945e-07, "loss": 0.6549, "step": 1696 }, { "epoch": 0.2737979993546305, "grad_norm": 1.484839035583867, "learning_rate": 8.692188755701389e-07, "loss": 0.4983, "step": 1697 }, { "epoch": 0.2739593417231365, "grad_norm": 2.0149687652438804, "learning_rate": 8.690389279873666e-07, "loss": 0.5963, "step": 1698 }, { "epoch": 0.2741206840916425, "grad_norm": 1.5223640768328155, "learning_rate": 8.688588753448567e-07, "loss": 0.5669, "step": 1699 }, { "epoch": 0.27428202646014843, "grad_norm": 2.06992656061492, "learning_rate": 8.686787176938673e-07, "loss": 0.4619, "step": 1700 }, { "epoch": 0.2744433688286544, "grad_norm": 1.474729257725193, "learning_rate": 8.684984550856864e-07, "loss": 0.6323, "step": 1701 }, { "epoch": 0.2746047111971604, "grad_norm": 1.9783775831232062, "learning_rate": 8.683180875716321e-07, "loss": 0.6551, "step": 1702 }, { "epoch": 0.27476605356566636, "grad_norm": 1.7843087686783423, "learning_rate": 8.681376152030524e-07, "loss": 0.5851, "step": 1703 }, { "epoch": 0.2749273959341723, "grad_norm": 1.7413467636661477, "learning_rate": 8.679570380313247e-07, "loss": 0.5553, "step": 1704 }, { "epoch": 0.27508873830267827, "grad_norm": 1.4928526669042086, "learning_rate": 8.677763561078568e-07, "loss": 0.5678, "step": 1705 }, { "epoch": 0.2752500806711842, "grad_norm": 1.2329993489550677, "learning_rate": 8.675955694840862e-07, "loss": 0.618, "step": 1706 }, { "epoch": 0.27541142303969024, "grad_norm": 1.7647038310169938, "learning_rate": 8.674146782114797e-07, "loss": 0.5152, "step": 1707 }, { "epoch": 0.2755727654081962, "grad_norm": 1.8564591826152033, "learning_rate": 8.672336823415346e-07, "loss": 0.55, "step": 1708 }, { "epoch": 0.27573410777670215, "grad_norm": 2.2656257366310766, "learning_rate": 8.670525819257775e-07, "loss": 0.6537, "step": 1709 }, { "epoch": 0.2758954501452081, "grad_norm": 2.2729736064581827, "learning_rate": 8.66871377015765e-07, "loss": 0.7205, "step": 1710 }, { "epoch": 0.2760567925137141, "grad_norm": 1.8612570333764256, "learning_rate": 8.666900676630835e-07, "loss": 0.5791, "step": 1711 }, { "epoch": 0.2762181348822201, "grad_norm": 1.4154791716859294, "learning_rate": 8.665086539193487e-07, "loss": 0.5424, "step": 1712 }, { "epoch": 0.27637947725072604, "grad_norm": 1.672032999434009, "learning_rate": 8.663271358362063e-07, "loss": 0.4709, "step": 1713 }, { "epoch": 0.276540819619232, "grad_norm": 1.520016864758342, "learning_rate": 8.661455134653321e-07, "loss": 0.4096, "step": 1714 }, { "epoch": 0.276702161987738, "grad_norm": 1.7794967187399435, "learning_rate": 8.659637868584308e-07, "loss": 0.4826, "step": 1715 }, { "epoch": 0.27686350435624396, "grad_norm": 1.4093626119064975, "learning_rate": 8.657819560672377e-07, "loss": 0.5896, "step": 1716 }, { "epoch": 0.2770248467247499, "grad_norm": 1.9608427837947666, "learning_rate": 8.656000211435166e-07, "loss": 0.5516, "step": 1717 }, { "epoch": 0.2771861890932559, "grad_norm": 1.928879850692835, "learning_rate": 8.654179821390621e-07, "loss": 0.5422, "step": 1718 }, { "epoch": 0.27734753146176183, "grad_norm": 1.5337670327929755, "learning_rate": 8.652358391056975e-07, "loss": 0.6547, "step": 1719 }, { "epoch": 0.27750887383026784, "grad_norm": 1.812261434179296, "learning_rate": 8.650535920952766e-07, "loss": 0.4856, "step": 1720 }, { "epoch": 0.2776702161987738, "grad_norm": 1.3835829835081876, "learning_rate": 8.64871241159682e-07, "loss": 0.4706, "step": 1721 }, { "epoch": 0.27783155856727976, "grad_norm": 1.7861009764923508, "learning_rate": 8.646887863508264e-07, "loss": 0.5831, "step": 1722 }, { "epoch": 0.2779929009357857, "grad_norm": 1.4848780532498522, "learning_rate": 8.645062277206516e-07, "loss": 0.653, "step": 1723 }, { "epoch": 0.2781542433042917, "grad_norm": 1.114808116362077, "learning_rate": 8.643235653211296e-07, "loss": 0.3753, "step": 1724 }, { "epoch": 0.2783155856727977, "grad_norm": 2.0956683124875357, "learning_rate": 8.641407992042616e-07, "loss": 0.608, "step": 1725 }, { "epoch": 0.27847692804130364, "grad_norm": 1.9340084979749335, "learning_rate": 8.639579294220778e-07, "loss": 0.531, "step": 1726 }, { "epoch": 0.2786382704098096, "grad_norm": 1.2587386802611362, "learning_rate": 8.637749560266391e-07, "loss": 0.5804, "step": 1727 }, { "epoch": 0.2787996127783156, "grad_norm": 2.210528763910721, "learning_rate": 8.635918790700349e-07, "loss": 0.6909, "step": 1728 }, { "epoch": 0.27896095514682157, "grad_norm": 1.4748921306412845, "learning_rate": 8.634086986043842e-07, "loss": 0.4907, "step": 1729 }, { "epoch": 0.2791222975153275, "grad_norm": 2.106428787385922, "learning_rate": 8.632254146818363e-07, "loss": 0.581, "step": 1730 }, { "epoch": 0.2792836398838335, "grad_norm": 1.6810935589353861, "learning_rate": 8.630420273545687e-07, "loss": 0.4331, "step": 1731 }, { "epoch": 0.2794449822523395, "grad_norm": 1.6348832428799713, "learning_rate": 8.628585366747893e-07, "loss": 0.4147, "step": 1732 }, { "epoch": 0.27960632462084545, "grad_norm": 1.903342985250092, "learning_rate": 8.626749426947348e-07, "loss": 0.711, "step": 1733 }, { "epoch": 0.2797676669893514, "grad_norm": 1.469764683631446, "learning_rate": 8.624912454666722e-07, "loss": 0.4527, "step": 1734 }, { "epoch": 0.27992900935785736, "grad_norm": 1.4089362449029526, "learning_rate": 8.623074450428964e-07, "loss": 0.5568, "step": 1735 }, { "epoch": 0.2800903517263633, "grad_norm": 1.5460033079953353, "learning_rate": 8.621235414757335e-07, "loss": 0.5763, "step": 1736 }, { "epoch": 0.28025169409486933, "grad_norm": 1.8777791725181099, "learning_rate": 8.619395348175374e-07, "loss": 0.4298, "step": 1737 }, { "epoch": 0.2804130364633753, "grad_norm": 1.4345726392297333, "learning_rate": 8.617554251206921e-07, "loss": 0.5598, "step": 1738 }, { "epoch": 0.28057437883188124, "grad_norm": 1.403265647380939, "learning_rate": 8.615712124376109e-07, "loss": 0.5181, "step": 1739 }, { "epoch": 0.2807357212003872, "grad_norm": 1.7786567952017014, "learning_rate": 8.613868968207365e-07, "loss": 0.4744, "step": 1740 }, { "epoch": 0.2808970635688932, "grad_norm": 1.2684493869131472, "learning_rate": 8.612024783225403e-07, "loss": 0.488, "step": 1741 }, { "epoch": 0.28105840593739917, "grad_norm": 1.844035175058333, "learning_rate": 8.610179569955239e-07, "loss": 0.584, "step": 1742 }, { "epoch": 0.2812197483059051, "grad_norm": 1.5340345327377203, "learning_rate": 8.608333328922173e-07, "loss": 0.5269, "step": 1743 }, { "epoch": 0.2813810906744111, "grad_norm": 1.930237170554871, "learning_rate": 8.606486060651806e-07, "loss": 0.5909, "step": 1744 }, { "epoch": 0.2815424330429171, "grad_norm": 1.699049246215204, "learning_rate": 8.604637765670022e-07, "loss": 0.7027, "step": 1745 }, { "epoch": 0.28170377541142305, "grad_norm": 2.289917561575581, "learning_rate": 8.602788444503005e-07, "loss": 0.7409, "step": 1746 }, { "epoch": 0.281865117779929, "grad_norm": 1.7295462256874057, "learning_rate": 8.600938097677228e-07, "loss": 0.6313, "step": 1747 }, { "epoch": 0.28202646014843497, "grad_norm": 1.4581071496582971, "learning_rate": 8.599086725719458e-07, "loss": 0.4497, "step": 1748 }, { "epoch": 0.2821878025169409, "grad_norm": 1.6551408472661595, "learning_rate": 8.59723432915675e-07, "loss": 0.5019, "step": 1749 }, { "epoch": 0.28234914488544693, "grad_norm": 1.5506649406114081, "learning_rate": 8.595380908516453e-07, "loss": 0.5736, "step": 1750 }, { "epoch": 0.2825104872539529, "grad_norm": 2.691171400567542, "learning_rate": 8.59352646432621e-07, "loss": 0.645, "step": 1751 }, { "epoch": 0.28267182962245885, "grad_norm": 1.6295872277987333, "learning_rate": 8.59167099711395e-07, "loss": 0.3845, "step": 1752 }, { "epoch": 0.2828331719909648, "grad_norm": 1.5886026172722516, "learning_rate": 8.589814507407897e-07, "loss": 0.7011, "step": 1753 }, { "epoch": 0.2829945143594708, "grad_norm": 1.4857752570018279, "learning_rate": 8.587956995736567e-07, "loss": 0.6112, "step": 1754 }, { "epoch": 0.2831558567279768, "grad_norm": 1.6886908956105522, "learning_rate": 8.586098462628763e-07, "loss": 0.6987, "step": 1755 }, { "epoch": 0.28331719909648273, "grad_norm": 1.8420850704276674, "learning_rate": 8.584238908613581e-07, "loss": 0.7276, "step": 1756 }, { "epoch": 0.2834785414649887, "grad_norm": 1.9369606374818706, "learning_rate": 8.582378334220411e-07, "loss": 0.6325, "step": 1757 }, { "epoch": 0.2836398838334947, "grad_norm": 1.7763925543216963, "learning_rate": 8.580516739978925e-07, "loss": 0.587, "step": 1758 }, { "epoch": 0.28380122620200066, "grad_norm": 1.7897232368089149, "learning_rate": 8.578654126419093e-07, "loss": 0.5312, "step": 1759 }, { "epoch": 0.2839625685705066, "grad_norm": 1.6997421686211172, "learning_rate": 8.576790494071174e-07, "loss": 0.616, "step": 1760 }, { "epoch": 0.28412391093901257, "grad_norm": 1.9725182815497486, "learning_rate": 8.574925843465711e-07, "loss": 0.6362, "step": 1761 }, { "epoch": 0.2842852533075186, "grad_norm": 2.2972622499536826, "learning_rate": 8.573060175133546e-07, "loss": 0.6583, "step": 1762 }, { "epoch": 0.28444659567602454, "grad_norm": 1.237392889069046, "learning_rate": 8.571193489605805e-07, "loss": 0.4855, "step": 1763 }, { "epoch": 0.2846079380445305, "grad_norm": 2.131313593746283, "learning_rate": 8.569325787413904e-07, "loss": 0.5977, "step": 1764 }, { "epoch": 0.28476928041303645, "grad_norm": 1.810633717977797, "learning_rate": 8.56745706908955e-07, "loss": 0.5672, "step": 1765 }, { "epoch": 0.2849306227815424, "grad_norm": 1.9935417210002229, "learning_rate": 8.565587335164739e-07, "loss": 0.645, "step": 1766 }, { "epoch": 0.2850919651500484, "grad_norm": 1.5499255223992994, "learning_rate": 8.563716586171754e-07, "loss": 0.4191, "step": 1767 }, { "epoch": 0.2852533075185544, "grad_norm": 1.5040786446188936, "learning_rate": 8.561844822643169e-07, "loss": 0.5829, "step": 1768 }, { "epoch": 0.28541464988706033, "grad_norm": 1.7384626315349105, "learning_rate": 8.55997204511185e-07, "loss": 0.417, "step": 1769 }, { "epoch": 0.2855759922555663, "grad_norm": 1.9601665668360486, "learning_rate": 8.558098254110942e-07, "loss": 0.4946, "step": 1770 }, { "epoch": 0.2857373346240723, "grad_norm": 1.8961308284320133, "learning_rate": 8.556223450173888e-07, "loss": 0.5799, "step": 1771 }, { "epoch": 0.28589867699257826, "grad_norm": 2.444483314792729, "learning_rate": 8.554347633834416e-07, "loss": 0.757, "step": 1772 }, { "epoch": 0.2860600193610842, "grad_norm": 1.8020973940026974, "learning_rate": 8.552470805626544e-07, "loss": 0.6374, "step": 1773 }, { "epoch": 0.2862213617295902, "grad_norm": 1.808083513761689, "learning_rate": 8.550592966084572e-07, "loss": 0.6095, "step": 1774 }, { "epoch": 0.2863827040980962, "grad_norm": 1.8246995835411293, "learning_rate": 8.548714115743094e-07, "loss": 0.6753, "step": 1775 }, { "epoch": 0.28654404646660214, "grad_norm": 1.560383240257287, "learning_rate": 8.546834255136992e-07, "loss": 0.5322, "step": 1776 }, { "epoch": 0.2867053888351081, "grad_norm": 1.9230323966080058, "learning_rate": 8.544953384801432e-07, "loss": 0.5386, "step": 1777 }, { "epoch": 0.28686673120361406, "grad_norm": 1.286842283747333, "learning_rate": 8.543071505271869e-07, "loss": 0.5763, "step": 1778 }, { "epoch": 0.28702807357212, "grad_norm": 1.7878347123487672, "learning_rate": 8.541188617084045e-07, "loss": 0.5159, "step": 1779 }, { "epoch": 0.287189415940626, "grad_norm": 1.5755119975028793, "learning_rate": 8.53930472077399e-07, "loss": 0.4078, "step": 1780 }, { "epoch": 0.287350758309132, "grad_norm": 1.7718796798940277, "learning_rate": 8.53741981687802e-07, "loss": 0.6088, "step": 1781 }, { "epoch": 0.28751210067763794, "grad_norm": 2.4839850545547213, "learning_rate": 8.535533905932737e-07, "loss": 0.7373, "step": 1782 }, { "epoch": 0.2876734430461439, "grad_norm": 1.4054450804417362, "learning_rate": 8.533646988475034e-07, "loss": 0.5391, "step": 1783 }, { "epoch": 0.2878347854146499, "grad_norm": 1.3816118361925276, "learning_rate": 8.531759065042085e-07, "loss": 0.6933, "step": 1784 }, { "epoch": 0.28799612778315586, "grad_norm": 1.153374524501836, "learning_rate": 8.529870136171355e-07, "loss": 0.5264, "step": 1785 }, { "epoch": 0.2881574701516618, "grad_norm": 1.807372898471264, "learning_rate": 8.527980202400593e-07, "loss": 0.5251, "step": 1786 }, { "epoch": 0.2883188125201678, "grad_norm": 1.7471530781689675, "learning_rate": 8.526089264267832e-07, "loss": 0.546, "step": 1787 }, { "epoch": 0.2884801548886738, "grad_norm": 1.5403073306392607, "learning_rate": 8.524197322311395e-07, "loss": 0.437, "step": 1788 }, { "epoch": 0.28864149725717975, "grad_norm": 1.6622055510006775, "learning_rate": 8.522304377069889e-07, "loss": 0.6218, "step": 1789 }, { "epoch": 0.2888028396256857, "grad_norm": 2.1010518073948528, "learning_rate": 8.520410429082206e-07, "loss": 0.6641, "step": 1790 }, { "epoch": 0.28896418199419166, "grad_norm": 1.6629663716745184, "learning_rate": 8.518515478887522e-07, "loss": 0.4128, "step": 1791 }, { "epoch": 0.2891255243626976, "grad_norm": 2.6652311295701057, "learning_rate": 8.516619527025305e-07, "loss": 0.6985, "step": 1792 }, { "epoch": 0.28928686673120363, "grad_norm": 1.6706366542947684, "learning_rate": 8.514722574035301e-07, "loss": 0.6078, "step": 1793 }, { "epoch": 0.2894482090997096, "grad_norm": 1.8284929997365178, "learning_rate": 8.512824620457542e-07, "loss": 0.7339, "step": 1794 }, { "epoch": 0.28960955146821554, "grad_norm": 1.9070090361742633, "learning_rate": 8.510925666832347e-07, "loss": 0.5658, "step": 1795 }, { "epoch": 0.2897708938367215, "grad_norm": 1.7198386472603975, "learning_rate": 8.50902571370032e-07, "loss": 0.6603, "step": 1796 }, { "epoch": 0.2899322362052275, "grad_norm": 1.3902159099847826, "learning_rate": 8.507124761602349e-07, "loss": 0.4748, "step": 1797 }, { "epoch": 0.29009357857373347, "grad_norm": 1.2910958638843901, "learning_rate": 8.505222811079607e-07, "loss": 0.4882, "step": 1798 }, { "epoch": 0.2902549209422394, "grad_norm": 1.309992119932862, "learning_rate": 8.503319862673545e-07, "loss": 0.4424, "step": 1799 }, { "epoch": 0.2904162633107454, "grad_norm": 1.7320858394019514, "learning_rate": 8.501415916925909e-07, "loss": 0.3987, "step": 1800 }, { "epoch": 0.2905776056792514, "grad_norm": 3.058653303519039, "learning_rate": 8.499510974378722e-07, "loss": 0.5231, "step": 1801 }, { "epoch": 0.29073894804775735, "grad_norm": 1.5316401393337984, "learning_rate": 8.49760503557429e-07, "loss": 0.6563, "step": 1802 }, { "epoch": 0.2909002904162633, "grad_norm": 1.7135932551293083, "learning_rate": 8.495698101055206e-07, "loss": 0.4147, "step": 1803 }, { "epoch": 0.29106163278476926, "grad_norm": 1.7326485916644077, "learning_rate": 8.493790171364343e-07, "loss": 0.7551, "step": 1804 }, { "epoch": 0.2912229751532753, "grad_norm": 1.5522739711488909, "learning_rate": 8.491881247044864e-07, "loss": 0.4037, "step": 1805 }, { "epoch": 0.29138431752178123, "grad_norm": 1.8914820722455883, "learning_rate": 8.489971328640206e-07, "loss": 0.6994, "step": 1806 }, { "epoch": 0.2915456598902872, "grad_norm": 1.6464895155567365, "learning_rate": 8.488060416694097e-07, "loss": 0.476, "step": 1807 }, { "epoch": 0.29170700225879315, "grad_norm": 1.690393580817452, "learning_rate": 8.486148511750542e-07, "loss": 0.3981, "step": 1808 }, { "epoch": 0.2918683446272991, "grad_norm": 1.8288317887222543, "learning_rate": 8.484235614353831e-07, "loss": 0.6482, "step": 1809 }, { "epoch": 0.2920296869958051, "grad_norm": 1.959885333949937, "learning_rate": 8.482321725048538e-07, "loss": 0.676, "step": 1810 }, { "epoch": 0.2921910293643111, "grad_norm": 2.1401667417647983, "learning_rate": 8.480406844379518e-07, "loss": 0.5791, "step": 1811 }, { "epoch": 0.29235237173281703, "grad_norm": 1.3837082111695884, "learning_rate": 8.478490972891907e-07, "loss": 0.5258, "step": 1812 }, { "epoch": 0.292513714101323, "grad_norm": 2.0816853935594564, "learning_rate": 8.476574111131125e-07, "loss": 0.6604, "step": 1813 }, { "epoch": 0.292675056469829, "grad_norm": 1.296711325370805, "learning_rate": 8.474656259642873e-07, "loss": 0.5294, "step": 1814 }, { "epoch": 0.29283639883833495, "grad_norm": 2.084452925888263, "learning_rate": 8.472737418973135e-07, "loss": 0.6701, "step": 1815 }, { "epoch": 0.2929977412068409, "grad_norm": 1.419638247281085, "learning_rate": 8.470817589668173e-07, "loss": 0.6917, "step": 1816 }, { "epoch": 0.29315908357534687, "grad_norm": 1.9779896766694063, "learning_rate": 8.468896772274537e-07, "loss": 0.5138, "step": 1817 }, { "epoch": 0.2933204259438529, "grad_norm": 1.2770134268333369, "learning_rate": 8.466974967339051e-07, "loss": 0.3687, "step": 1818 }, { "epoch": 0.29348176831235884, "grad_norm": 1.7291675782105522, "learning_rate": 8.465052175408826e-07, "loss": 0.4492, "step": 1819 }, { "epoch": 0.2936431106808648, "grad_norm": 2.138425713573119, "learning_rate": 8.463128397031248e-07, "loss": 0.571, "step": 1820 }, { "epoch": 0.29380445304937075, "grad_norm": 1.844739292922756, "learning_rate": 8.46120363275399e-07, "loss": 0.468, "step": 1821 }, { "epoch": 0.2939657954178767, "grad_norm": 1.57228412498821, "learning_rate": 8.459277883125003e-07, "loss": 0.6798, "step": 1822 }, { "epoch": 0.2941271377863827, "grad_norm": 1.302565991273031, "learning_rate": 8.457351148692518e-07, "loss": 0.5052, "step": 1823 }, { "epoch": 0.2942884801548887, "grad_norm": 2.114373959353036, "learning_rate": 8.455423430005047e-07, "loss": 0.6235, "step": 1824 }, { "epoch": 0.29444982252339463, "grad_norm": 1.3147485636246112, "learning_rate": 8.45349472761138e-07, "loss": 0.6024, "step": 1825 }, { "epoch": 0.2946111648919006, "grad_norm": 1.6185529169123822, "learning_rate": 8.451565042060591e-07, "loss": 0.5937, "step": 1826 }, { "epoch": 0.2947725072604066, "grad_norm": 2.045207152192569, "learning_rate": 8.449634373902033e-07, "loss": 0.5686, "step": 1827 }, { "epoch": 0.29493384962891256, "grad_norm": 1.6467310312307046, "learning_rate": 8.447702723685334e-07, "loss": 0.4972, "step": 1828 }, { "epoch": 0.2950951919974185, "grad_norm": 2.049283074413974, "learning_rate": 8.445770091960409e-07, "loss": 0.486, "step": 1829 }, { "epoch": 0.2952565343659245, "grad_norm": 1.9300096374231566, "learning_rate": 8.443836479277447e-07, "loss": 0.6901, "step": 1830 }, { "epoch": 0.2954178767344305, "grad_norm": 1.76920286925271, "learning_rate": 8.441901886186918e-07, "loss": 0.6284, "step": 1831 }, { "epoch": 0.29557921910293644, "grad_norm": 1.3567144732444667, "learning_rate": 8.439966313239572e-07, "loss": 0.3943, "step": 1832 }, { "epoch": 0.2957405614714424, "grad_norm": 1.4051801638634613, "learning_rate": 8.438029760986436e-07, "loss": 0.4848, "step": 1833 }, { "epoch": 0.29590190383994835, "grad_norm": 2.183736670223576, "learning_rate": 8.436092229978817e-07, "loss": 0.8192, "step": 1834 }, { "epoch": 0.29606324620845437, "grad_norm": 2.1018204654784394, "learning_rate": 8.434153720768302e-07, "loss": 0.6524, "step": 1835 }, { "epoch": 0.2962245885769603, "grad_norm": 1.572775508112864, "learning_rate": 8.432214233906751e-07, "loss": 0.4999, "step": 1836 }, { "epoch": 0.2963859309454663, "grad_norm": 1.410970605945526, "learning_rate": 8.43027376994631e-07, "loss": 0.4907, "step": 1837 }, { "epoch": 0.29654727331397224, "grad_norm": 1.9247323481776841, "learning_rate": 8.428332329439398e-07, "loss": 0.5591, "step": 1838 }, { "epoch": 0.2967086156824782, "grad_norm": 1.6820203328077332, "learning_rate": 8.426389912938714e-07, "loss": 0.4949, "step": 1839 }, { "epoch": 0.2968699580509842, "grad_norm": 2.418439138382099, "learning_rate": 8.424446520997234e-07, "loss": 0.5674, "step": 1840 }, { "epoch": 0.29703130041949016, "grad_norm": 1.76105454605082, "learning_rate": 8.422502154168211e-07, "loss": 0.5264, "step": 1841 }, { "epoch": 0.2971926427879961, "grad_norm": 1.391270862570301, "learning_rate": 8.420556813005178e-07, "loss": 0.4095, "step": 1842 }, { "epoch": 0.2973539851565021, "grad_norm": 1.5310090323044083, "learning_rate": 8.418610498061942e-07, "loss": 0.67, "step": 1843 }, { "epoch": 0.2975153275250081, "grad_norm": 1.4090788468770328, "learning_rate": 8.41666320989259e-07, "loss": 0.4063, "step": 1844 }, { "epoch": 0.29767666989351405, "grad_norm": 1.8011388857982187, "learning_rate": 8.414714949051487e-07, "loss": 0.6769, "step": 1845 }, { "epoch": 0.29783801226202, "grad_norm": 1.4945089447223467, "learning_rate": 8.41276571609327e-07, "loss": 0.5841, "step": 1846 }, { "epoch": 0.29799935463052596, "grad_norm": 1.9581130154094075, "learning_rate": 8.410815511572858e-07, "loss": 0.4476, "step": 1847 }, { "epoch": 0.29816069699903197, "grad_norm": 1.6814063215791892, "learning_rate": 8.408864336045443e-07, "loss": 0.5829, "step": 1848 }, { "epoch": 0.29832203936753793, "grad_norm": 1.4190651094151816, "learning_rate": 8.406912190066496e-07, "loss": 0.4861, "step": 1849 }, { "epoch": 0.2984833817360439, "grad_norm": 1.92056099236014, "learning_rate": 8.404959074191762e-07, "loss": 0.6589, "step": 1850 }, { "epoch": 0.29864472410454984, "grad_norm": 1.5088379688422786, "learning_rate": 8.403004988977264e-07, "loss": 0.5256, "step": 1851 }, { "epoch": 0.2988060664730558, "grad_norm": 1.5985668171081104, "learning_rate": 8.4010499349793e-07, "loss": 0.4248, "step": 1852 }, { "epoch": 0.2989674088415618, "grad_norm": 1.8541941551428265, "learning_rate": 8.399093912754446e-07, "loss": 0.5301, "step": 1853 }, { "epoch": 0.29912875121006777, "grad_norm": 1.3805536905698674, "learning_rate": 8.397136922859547e-07, "loss": 0.649, "step": 1854 }, { "epoch": 0.2992900935785737, "grad_norm": 2.0175627627531068, "learning_rate": 8.39517896585173e-07, "loss": 0.5738, "step": 1855 }, { "epoch": 0.2994514359470797, "grad_norm": 2.0459095082859777, "learning_rate": 8.393220042288398e-07, "loss": 0.4111, "step": 1856 }, { "epoch": 0.2996127783155857, "grad_norm": 1.7579120523024954, "learning_rate": 8.391260152727224e-07, "loss": 0.7743, "step": 1857 }, { "epoch": 0.29977412068409165, "grad_norm": 1.7057170916025561, "learning_rate": 8.38929929772616e-07, "loss": 0.5804, "step": 1858 }, { "epoch": 0.2999354630525976, "grad_norm": 1.1910591495376914, "learning_rate": 8.387337477843429e-07, "loss": 0.5423, "step": 1859 }, { "epoch": 0.30009680542110356, "grad_norm": 2.4462091429061013, "learning_rate": 8.385374693637533e-07, "loss": 0.9334, "step": 1860 }, { "epoch": 0.3002581477896096, "grad_norm": 1.5678546706259913, "learning_rate": 8.383410945667246e-07, "loss": 0.5773, "step": 1861 }, { "epoch": 0.30041949015811553, "grad_norm": 1.3399124350333975, "learning_rate": 8.381446234491618e-07, "loss": 0.6089, "step": 1862 }, { "epoch": 0.3005808325266215, "grad_norm": 1.6963286979933287, "learning_rate": 8.379480560669972e-07, "loss": 0.5245, "step": 1863 }, { "epoch": 0.30074217489512745, "grad_norm": 2.3266138894763664, "learning_rate": 8.377513924761905e-07, "loss": 0.6189, "step": 1864 }, { "epoch": 0.30090351726363346, "grad_norm": 1.72556901199166, "learning_rate": 8.375546327327287e-07, "loss": 0.632, "step": 1865 }, { "epoch": 0.3010648596321394, "grad_norm": 1.8499776117155788, "learning_rate": 8.373577768926265e-07, "loss": 0.7015, "step": 1866 }, { "epoch": 0.30122620200064537, "grad_norm": 2.1573779707928566, "learning_rate": 8.371608250119256e-07, "loss": 0.7744, "step": 1867 }, { "epoch": 0.30138754436915133, "grad_norm": 2.2384972592921346, "learning_rate": 8.369637771466951e-07, "loss": 0.7171, "step": 1868 }, { "epoch": 0.3015488867376573, "grad_norm": 2.4430513988307943, "learning_rate": 8.367666333530317e-07, "loss": 0.6344, "step": 1869 }, { "epoch": 0.3017102291061633, "grad_norm": 1.6261335967243749, "learning_rate": 8.365693936870592e-07, "loss": 0.443, "step": 1870 }, { "epoch": 0.30187157147466925, "grad_norm": 1.6784287105053879, "learning_rate": 8.363720582049287e-07, "loss": 0.5741, "step": 1871 }, { "epoch": 0.3020329138431752, "grad_norm": 1.5748071824983336, "learning_rate": 8.361746269628184e-07, "loss": 0.5021, "step": 1872 }, { "epoch": 0.30219425621168117, "grad_norm": 1.9111286332869801, "learning_rate": 8.359771000169345e-07, "loss": 0.5009, "step": 1873 }, { "epoch": 0.3023555985801872, "grad_norm": 1.779742489736352, "learning_rate": 8.357794774235092e-07, "loss": 0.5643, "step": 1874 }, { "epoch": 0.30251694094869314, "grad_norm": 1.8809957642981594, "learning_rate": 8.35581759238803e-07, "loss": 0.7259, "step": 1875 }, { "epoch": 0.3026782833171991, "grad_norm": 1.7520396063477857, "learning_rate": 8.353839455191032e-07, "loss": 0.5681, "step": 1876 }, { "epoch": 0.30283962568570505, "grad_norm": 1.8796164584033317, "learning_rate": 8.351860363207244e-07, "loss": 0.5599, "step": 1877 }, { "epoch": 0.30300096805421106, "grad_norm": 1.9179037358905222, "learning_rate": 8.349880317000082e-07, "loss": 0.5706, "step": 1878 }, { "epoch": 0.303162310422717, "grad_norm": 1.4643211958570532, "learning_rate": 8.347899317133235e-07, "loss": 0.4747, "step": 1879 }, { "epoch": 0.303323652791223, "grad_norm": 1.6847065303847784, "learning_rate": 8.345917364170664e-07, "loss": 0.5238, "step": 1880 }, { "epoch": 0.30348499515972893, "grad_norm": 1.7086960554781865, "learning_rate": 8.343934458676603e-07, "loss": 0.4647, "step": 1881 }, { "epoch": 0.3036463375282349, "grad_norm": 2.0753876289562885, "learning_rate": 8.341950601215549e-07, "loss": 0.7515, "step": 1882 }, { "epoch": 0.3038076798967409, "grad_norm": 2.4686704212352124, "learning_rate": 8.33996579235228e-07, "loss": 0.697, "step": 1883 }, { "epoch": 0.30396902226524686, "grad_norm": 1.2274362646986603, "learning_rate": 8.337980032651841e-07, "loss": 0.4515, "step": 1884 }, { "epoch": 0.3041303646337528, "grad_norm": 1.6636496734749977, "learning_rate": 8.335993322679546e-07, "loss": 0.6497, "step": 1885 }, { "epoch": 0.30429170700225877, "grad_norm": 1.9111427926732258, "learning_rate": 8.33400566300098e-07, "loss": 0.6247, "step": 1886 }, { "epoch": 0.3044530493707648, "grad_norm": 1.5294519884119206, "learning_rate": 8.332017054182003e-07, "loss": 0.5697, "step": 1887 }, { "epoch": 0.30461439173927074, "grad_norm": 1.3502775931177826, "learning_rate": 8.330027496788737e-07, "loss": 0.4821, "step": 1888 }, { "epoch": 0.3047757341077767, "grad_norm": 1.252165254186325, "learning_rate": 8.328036991387581e-07, "loss": 0.4185, "step": 1889 }, { "epoch": 0.30493707647628265, "grad_norm": 1.50374842224292, "learning_rate": 8.326045538545201e-07, "loss": 0.5892, "step": 1890 }, { "epoch": 0.30509841884478867, "grad_norm": 1.9975938031607294, "learning_rate": 8.324053138828535e-07, "loss": 0.507, "step": 1891 }, { "epoch": 0.3052597612132946, "grad_norm": 1.5839214571158544, "learning_rate": 8.322059792804785e-07, "loss": 0.6559, "step": 1892 }, { "epoch": 0.3054211035818006, "grad_norm": 1.4568831544940002, "learning_rate": 8.32006550104143e-07, "loss": 0.4552, "step": 1893 }, { "epoch": 0.30558244595030654, "grad_norm": 1.3893044877788934, "learning_rate": 8.318070264106211e-07, "loss": 0.3056, "step": 1894 }, { "epoch": 0.3057437883188125, "grad_norm": 1.995293443748628, "learning_rate": 8.316074082567143e-07, "loss": 0.5991, "step": 1895 }, { "epoch": 0.3059051306873185, "grad_norm": 2.1617247145404703, "learning_rate": 8.314076956992509e-07, "loss": 0.5675, "step": 1896 }, { "epoch": 0.30606647305582446, "grad_norm": 1.7070987085766232, "learning_rate": 8.312078887950859e-07, "loss": 0.5279, "step": 1897 }, { "epoch": 0.3062278154243304, "grad_norm": 1.7356204123568595, "learning_rate": 8.310079876011014e-07, "loss": 0.3528, "step": 1898 }, { "epoch": 0.3063891577928364, "grad_norm": 1.8337762326559524, "learning_rate": 8.308079921742059e-07, "loss": 0.544, "step": 1899 }, { "epoch": 0.3065505001613424, "grad_norm": 1.61749950108233, "learning_rate": 8.306079025713353e-07, "loss": 0.4824, "step": 1900 }, { "epoch": 0.30671184252984834, "grad_norm": 2.466035533614988, "learning_rate": 8.30407718849452e-07, "loss": 0.6953, "step": 1901 }, { "epoch": 0.3068731848983543, "grad_norm": 2.077927314706031, "learning_rate": 8.302074410655454e-07, "loss": 0.6602, "step": 1902 }, { "epoch": 0.30703452726686026, "grad_norm": 1.8209937134853658, "learning_rate": 8.300070692766312e-07, "loss": 0.6596, "step": 1903 }, { "epoch": 0.30719586963536627, "grad_norm": 1.6741276476376337, "learning_rate": 8.298066035397526e-07, "loss": 0.6225, "step": 1904 }, { "epoch": 0.3073572120038722, "grad_norm": 1.8588945425203098, "learning_rate": 8.296060439119787e-07, "loss": 0.5182, "step": 1905 }, { "epoch": 0.3075185543723782, "grad_norm": 1.1213154324392798, "learning_rate": 8.294053904504059e-07, "loss": 0.4738, "step": 1906 }, { "epoch": 0.30767989674088414, "grad_norm": 2.013457678161333, "learning_rate": 8.292046432121573e-07, "loss": 0.5109, "step": 1907 }, { "epoch": 0.30784123910939015, "grad_norm": 2.015368068199301, "learning_rate": 8.290038022543825e-07, "loss": 0.7264, "step": 1908 }, { "epoch": 0.3080025814778961, "grad_norm": 2.2827415293393396, "learning_rate": 8.288028676342578e-07, "loss": 0.7598, "step": 1909 }, { "epoch": 0.30816392384640207, "grad_norm": 2.1652644706394923, "learning_rate": 8.286018394089863e-07, "loss": 0.5725, "step": 1910 }, { "epoch": 0.308325266214908, "grad_norm": 1.2363057548825307, "learning_rate": 8.284007176357975e-07, "loss": 0.4819, "step": 1911 }, { "epoch": 0.308486608583414, "grad_norm": 1.6317905714211252, "learning_rate": 8.281995023719477e-07, "loss": 0.6124, "step": 1912 }, { "epoch": 0.30864795095192, "grad_norm": 1.736912769457414, "learning_rate": 8.2799819367472e-07, "loss": 0.6603, "step": 1913 }, { "epoch": 0.30880929332042595, "grad_norm": 1.9009465093617095, "learning_rate": 8.277967916014239e-07, "loss": 0.573, "step": 1914 }, { "epoch": 0.3089706356889319, "grad_norm": 2.085539590551698, "learning_rate": 8.275952962093953e-07, "loss": 0.4508, "step": 1915 }, { "epoch": 0.30913197805743786, "grad_norm": 2.1035377267510538, "learning_rate": 8.27393707555997e-07, "loss": 0.461, "step": 1916 }, { "epoch": 0.3092933204259439, "grad_norm": 1.7308169460240217, "learning_rate": 8.271920256986182e-07, "loss": 0.4836, "step": 1917 }, { "epoch": 0.30945466279444983, "grad_norm": 1.356748037702, "learning_rate": 8.269902506946746e-07, "loss": 0.6306, "step": 1918 }, { "epoch": 0.3096160051629558, "grad_norm": 2.1117104493830805, "learning_rate": 8.267883826016083e-07, "loss": 0.5711, "step": 1919 }, { "epoch": 0.30977734753146174, "grad_norm": 1.9341813244716894, "learning_rate": 8.265864214768883e-07, "loss": 0.7633, "step": 1920 }, { "epoch": 0.30993868989996776, "grad_norm": 1.6364074993757092, "learning_rate": 8.263843673780097e-07, "loss": 0.6732, "step": 1921 }, { "epoch": 0.3101000322684737, "grad_norm": 1.531510077986031, "learning_rate": 8.261822203624945e-07, "loss": 0.3802, "step": 1922 }, { "epoch": 0.31026137463697967, "grad_norm": 1.8292036216438055, "learning_rate": 8.259799804878904e-07, "loss": 0.6363, "step": 1923 }, { "epoch": 0.3104227170054856, "grad_norm": 1.8718956361884527, "learning_rate": 8.257776478117724e-07, "loss": 0.6241, "step": 1924 }, { "epoch": 0.3105840593739916, "grad_norm": 1.3185291322275188, "learning_rate": 8.255752223917412e-07, "loss": 0.503, "step": 1925 }, { "epoch": 0.3107454017424976, "grad_norm": 1.7946585211920896, "learning_rate": 8.253727042854244e-07, "loss": 0.724, "step": 1926 }, { "epoch": 0.31090674411100355, "grad_norm": 1.9995414685096058, "learning_rate": 8.251700935504758e-07, "loss": 0.6071, "step": 1927 }, { "epoch": 0.3110680864795095, "grad_norm": 1.571571718235557, "learning_rate": 8.249673902445756e-07, "loss": 0.4941, "step": 1928 }, { "epoch": 0.31122942884801547, "grad_norm": 1.2404400031769487, "learning_rate": 8.247645944254303e-07, "loss": 0.5257, "step": 1929 }, { "epoch": 0.3113907712165215, "grad_norm": 1.8295115242110582, "learning_rate": 8.245617061507726e-07, "loss": 0.5211, "step": 1930 }, { "epoch": 0.31155211358502743, "grad_norm": 1.8803281103078633, "learning_rate": 8.24358725478362e-07, "loss": 0.6134, "step": 1931 }, { "epoch": 0.3117134559535334, "grad_norm": 1.8857182916582171, "learning_rate": 8.241556524659837e-07, "loss": 0.3808, "step": 1932 }, { "epoch": 0.31187479832203935, "grad_norm": 1.5517461354972861, "learning_rate": 8.239524871714498e-07, "loss": 0.5096, "step": 1933 }, { "epoch": 0.31203614069054536, "grad_norm": 1.4114526511590562, "learning_rate": 8.23749229652598e-07, "loss": 0.5282, "step": 1934 }, { "epoch": 0.3121974830590513, "grad_norm": 3.0065289024995154, "learning_rate": 8.235458799672927e-07, "loss": 0.6539, "step": 1935 }, { "epoch": 0.3123588254275573, "grad_norm": 1.710355058583486, "learning_rate": 8.233424381734247e-07, "loss": 0.5267, "step": 1936 }, { "epoch": 0.31252016779606323, "grad_norm": 2.5337536521700867, "learning_rate": 8.231389043289105e-07, "loss": 0.5695, "step": 1937 }, { "epoch": 0.31268151016456924, "grad_norm": 1.4708477217679208, "learning_rate": 8.229352784916932e-07, "loss": 0.3969, "step": 1938 }, { "epoch": 0.3128428525330752, "grad_norm": 1.2826428634608593, "learning_rate": 8.227315607197419e-07, "loss": 0.528, "step": 1939 }, { "epoch": 0.31300419490158116, "grad_norm": 1.705662368359473, "learning_rate": 8.22527751071052e-07, "loss": 0.5853, "step": 1940 }, { "epoch": 0.3131655372700871, "grad_norm": 1.7955707626569597, "learning_rate": 8.223238496036448e-07, "loss": 0.385, "step": 1941 }, { "epoch": 0.31332687963859307, "grad_norm": 2.1997946600019502, "learning_rate": 8.221198563755681e-07, "loss": 0.6447, "step": 1942 }, { "epoch": 0.3134882220070991, "grad_norm": 2.176104263079769, "learning_rate": 8.219157714448956e-07, "loss": 0.5628, "step": 1943 }, { "epoch": 0.31364956437560504, "grad_norm": 1.3688367650650006, "learning_rate": 8.217115948697273e-07, "loss": 0.6721, "step": 1944 }, { "epoch": 0.313810906744111, "grad_norm": 1.816503903624973, "learning_rate": 8.21507326708189e-07, "loss": 0.5473, "step": 1945 }, { "epoch": 0.31397224911261695, "grad_norm": 1.580721239785477, "learning_rate": 8.213029670184326e-07, "loss": 0.4803, "step": 1946 }, { "epoch": 0.31413359148112296, "grad_norm": 1.5635398456411242, "learning_rate": 8.210985158586365e-07, "loss": 0.4814, "step": 1947 }, { "epoch": 0.3142949338496289, "grad_norm": 1.886384669831117, "learning_rate": 8.208939732870046e-07, "loss": 0.5119, "step": 1948 }, { "epoch": 0.3144562762181349, "grad_norm": 2.0647893972817064, "learning_rate": 8.206893393617671e-07, "loss": 0.5019, "step": 1949 }, { "epoch": 0.31461761858664083, "grad_norm": 1.9576728058561825, "learning_rate": 8.204846141411801e-07, "loss": 0.6411, "step": 1950 }, { "epoch": 0.31477896095514685, "grad_norm": 1.871113309180619, "learning_rate": 8.202797976835258e-07, "loss": 0.5896, "step": 1951 }, { "epoch": 0.3149403033236528, "grad_norm": 1.554773222893836, "learning_rate": 8.200748900471121e-07, "loss": 0.5, "step": 1952 }, { "epoch": 0.31510164569215876, "grad_norm": 1.3314064775656318, "learning_rate": 8.198698912902736e-07, "loss": 0.6077, "step": 1953 }, { "epoch": 0.3152629880606647, "grad_norm": 1.8512894312376658, "learning_rate": 8.196648014713697e-07, "loss": 0.636, "step": 1954 }, { "epoch": 0.3154243304291707, "grad_norm": 1.6706420059565874, "learning_rate": 8.194596206487865e-07, "loss": 0.5099, "step": 1955 }, { "epoch": 0.3155856727976767, "grad_norm": 1.5536847738752078, "learning_rate": 8.192543488809361e-07, "loss": 0.6472, "step": 1956 }, { "epoch": 0.31574701516618264, "grad_norm": 2.1128211234438212, "learning_rate": 8.190489862262562e-07, "loss": 0.6333, "step": 1957 }, { "epoch": 0.3159083575346886, "grad_norm": 1.5469532185825103, "learning_rate": 8.188435327432099e-07, "loss": 0.4979, "step": 1958 }, { "epoch": 0.31606969990319456, "grad_norm": 3.188525465025061, "learning_rate": 8.186379884902874e-07, "loss": 0.5178, "step": 1959 }, { "epoch": 0.31623104227170057, "grad_norm": 1.689063513488183, "learning_rate": 8.184323535260036e-07, "loss": 0.5689, "step": 1960 }, { "epoch": 0.3163923846402065, "grad_norm": 1.5637593344235243, "learning_rate": 8.182266279088995e-07, "loss": 0.4913, "step": 1961 }, { "epoch": 0.3165537270087125, "grad_norm": 1.8457983890552458, "learning_rate": 8.180208116975425e-07, "loss": 0.6016, "step": 1962 }, { "epoch": 0.31671506937721844, "grad_norm": 1.5900308934695262, "learning_rate": 8.178149049505247e-07, "loss": 0.6274, "step": 1963 }, { "epoch": 0.31687641174572445, "grad_norm": 1.4751206882046253, "learning_rate": 8.176089077264649e-07, "loss": 0.4976, "step": 1964 }, { "epoch": 0.3170377541142304, "grad_norm": 1.2718464091318695, "learning_rate": 8.174028200840077e-07, "loss": 0.5232, "step": 1965 }, { "epoch": 0.31719909648273636, "grad_norm": 1.2155867877043858, "learning_rate": 8.171966420818227e-07, "loss": 0.5054, "step": 1966 }, { "epoch": 0.3173604388512423, "grad_norm": 1.6148735851969938, "learning_rate": 8.169903737786057e-07, "loss": 0.4579, "step": 1967 }, { "epoch": 0.31752178121974833, "grad_norm": 1.8834292086388216, "learning_rate": 8.167840152330783e-07, "loss": 0.6189, "step": 1968 }, { "epoch": 0.3176831235882543, "grad_norm": 1.5239445551651174, "learning_rate": 8.165775665039871e-07, "loss": 0.4755, "step": 1969 }, { "epoch": 0.31784446595676025, "grad_norm": 2.3292075079087913, "learning_rate": 8.163710276501056e-07, "loss": 0.625, "step": 1970 }, { "epoch": 0.3180058083252662, "grad_norm": 1.504805971989805, "learning_rate": 8.161643987302318e-07, "loss": 0.3816, "step": 1971 }, { "epoch": 0.31816715069377216, "grad_norm": 1.4383024380939928, "learning_rate": 8.1595767980319e-07, "loss": 0.4912, "step": 1972 }, { "epoch": 0.3183284930622782, "grad_norm": 2.0865388496341772, "learning_rate": 8.157508709278297e-07, "loss": 0.788, "step": 1973 }, { "epoch": 0.31848983543078413, "grad_norm": 1.4657962398089932, "learning_rate": 8.155439721630264e-07, "loss": 0.4087, "step": 1974 }, { "epoch": 0.3186511777992901, "grad_norm": 1.410553894617092, "learning_rate": 8.153369835676809e-07, "loss": 0.3902, "step": 1975 }, { "epoch": 0.31881252016779604, "grad_norm": 1.560580562375954, "learning_rate": 8.151299052007199e-07, "loss": 0.5338, "step": 1976 }, { "epoch": 0.31897386253630206, "grad_norm": 1.3441719013844533, "learning_rate": 8.149227371210951e-07, "loss": 0.4309, "step": 1977 }, { "epoch": 0.319135204904808, "grad_norm": 1.8545036938286734, "learning_rate": 8.147154793877845e-07, "loss": 0.4135, "step": 1978 }, { "epoch": 0.31929654727331397, "grad_norm": 1.4680479279056013, "learning_rate": 8.145081320597908e-07, "loss": 0.3284, "step": 1979 }, { "epoch": 0.3194578896418199, "grad_norm": 1.4732201910519833, "learning_rate": 8.143006951961429e-07, "loss": 0.285, "step": 1980 }, { "epoch": 0.31961923201032594, "grad_norm": 1.9767128506497726, "learning_rate": 8.140931688558948e-07, "loss": 0.7183, "step": 1981 }, { "epoch": 0.3197805743788319, "grad_norm": 1.46371416408021, "learning_rate": 8.138855530981261e-07, "loss": 0.5214, "step": 1982 }, { "epoch": 0.31994191674733785, "grad_norm": 1.747280801169456, "learning_rate": 8.136778479819417e-07, "loss": 0.487, "step": 1983 }, { "epoch": 0.3201032591158438, "grad_norm": 1.9030819191151591, "learning_rate": 8.134700535664723e-07, "loss": 0.6787, "step": 1984 }, { "epoch": 0.32026460148434976, "grad_norm": 1.310364348070146, "learning_rate": 8.132621699108736e-07, "loss": 0.632, "step": 1985 }, { "epoch": 0.3204259438528558, "grad_norm": 1.9999029612841777, "learning_rate": 8.13054197074327e-07, "loss": 0.7103, "step": 1986 }, { "epoch": 0.32058728622136173, "grad_norm": 1.8860090038331399, "learning_rate": 8.128461351160392e-07, "loss": 0.6444, "step": 1987 }, { "epoch": 0.3207486285898677, "grad_norm": 1.6022558432746437, "learning_rate": 8.12637984095242e-07, "loss": 0.4268, "step": 1988 }, { "epoch": 0.32090997095837365, "grad_norm": 1.2907878080302344, "learning_rate": 8.124297440711932e-07, "loss": 0.4025, "step": 1989 }, { "epoch": 0.32107131332687966, "grad_norm": 1.7414001601648788, "learning_rate": 8.122214151031752e-07, "loss": 0.6115, "step": 1990 }, { "epoch": 0.3212326556953856, "grad_norm": 1.5801502489526547, "learning_rate": 8.120129972504966e-07, "loss": 0.5812, "step": 1991 }, { "epoch": 0.3213939980638916, "grad_norm": 1.6390711374118891, "learning_rate": 8.118044905724899e-07, "loss": 0.5152, "step": 1992 }, { "epoch": 0.32155534043239753, "grad_norm": 2.337927439566293, "learning_rate": 8.115958951285147e-07, "loss": 0.6593, "step": 1993 }, { "epoch": 0.32171668280090354, "grad_norm": 2.269719495205093, "learning_rate": 8.113872109779542e-07, "loss": 0.4993, "step": 1994 }, { "epoch": 0.3218780251694095, "grad_norm": 1.4999413478828507, "learning_rate": 8.11178438180218e-07, "loss": 0.3986, "step": 1995 }, { "epoch": 0.32203936753791546, "grad_norm": 1.6682064414126492, "learning_rate": 8.109695767947405e-07, "loss": 0.6527, "step": 1996 }, { "epoch": 0.3222007099064214, "grad_norm": 1.8777031804051494, "learning_rate": 8.107606268809812e-07, "loss": 0.6109, "step": 1997 }, { "epoch": 0.32236205227492737, "grad_norm": 1.2841536022481383, "learning_rate": 8.10551588498425e-07, "loss": 0.556, "step": 1998 }, { "epoch": 0.3225233946434334, "grad_norm": 1.4043264160444655, "learning_rate": 8.103424617065819e-07, "loss": 0.3216, "step": 1999 }, { "epoch": 0.32268473701193934, "grad_norm": 1.4253250019149963, "learning_rate": 8.101332465649873e-07, "loss": 0.4854, "step": 2000 }, { "epoch": 0.3228460793804453, "grad_norm": 1.6919438518695082, "learning_rate": 8.099239431332011e-07, "loss": 0.563, "step": 2001 }, { "epoch": 0.32300742174895125, "grad_norm": 1.4494537409769432, "learning_rate": 8.097145514708093e-07, "loss": 0.5751, "step": 2002 }, { "epoch": 0.32316876411745726, "grad_norm": 1.9157312086822273, "learning_rate": 8.095050716374221e-07, "loss": 0.6374, "step": 2003 }, { "epoch": 0.3233301064859632, "grad_norm": 1.939184379589634, "learning_rate": 8.092955036926754e-07, "loss": 0.4906, "step": 2004 }, { "epoch": 0.3234914488544692, "grad_norm": 4.553272478991823, "learning_rate": 8.0908584769623e-07, "loss": 0.6933, "step": 2005 }, { "epoch": 0.32365279122297513, "grad_norm": 1.803061078556681, "learning_rate": 8.088761037077718e-07, "loss": 0.5336, "step": 2006 }, { "epoch": 0.32381413359148115, "grad_norm": 1.4753726423852809, "learning_rate": 8.086662717870114e-07, "loss": 0.3554, "step": 2007 }, { "epoch": 0.3239754759599871, "grad_norm": 1.9334972627244054, "learning_rate": 8.084563519936851e-07, "loss": 0.5078, "step": 2008 }, { "epoch": 0.32413681832849306, "grad_norm": 1.7595710604498078, "learning_rate": 8.082463443875538e-07, "loss": 0.533, "step": 2009 }, { "epoch": 0.324298160696999, "grad_norm": 1.8705431420705179, "learning_rate": 8.080362490284032e-07, "loss": 0.4721, "step": 2010 }, { "epoch": 0.32445950306550503, "grad_norm": 1.5019501089554406, "learning_rate": 8.078260659760445e-07, "loss": 0.3865, "step": 2011 }, { "epoch": 0.324620845434011, "grad_norm": 1.5157944889274944, "learning_rate": 8.076157952903134e-07, "loss": 0.4968, "step": 2012 }, { "epoch": 0.32478218780251694, "grad_norm": 1.7265930173084254, "learning_rate": 8.07405437031071e-07, "loss": 0.3859, "step": 2013 }, { "epoch": 0.3249435301710229, "grad_norm": 1.5597428218665985, "learning_rate": 8.071949912582028e-07, "loss": 0.4589, "step": 2014 }, { "epoch": 0.32510487253952886, "grad_norm": 1.2437174749674331, "learning_rate": 8.069844580316196e-07, "loss": 0.3358, "step": 2015 }, { "epoch": 0.32526621490803487, "grad_norm": 1.7140389815519195, "learning_rate": 8.067738374112571e-07, "loss": 0.7611, "step": 2016 }, { "epoch": 0.3254275572765408, "grad_norm": 1.629673619270644, "learning_rate": 8.065631294570756e-07, "loss": 0.4167, "step": 2017 }, { "epoch": 0.3255888996450468, "grad_norm": 1.8270380424125963, "learning_rate": 8.063523342290606e-07, "loss": 0.7293, "step": 2018 }, { "epoch": 0.32575024201355274, "grad_norm": 1.8268916216161668, "learning_rate": 8.061414517872221e-07, "loss": 0.3748, "step": 2019 }, { "epoch": 0.32591158438205875, "grad_norm": 1.4291385785040593, "learning_rate": 8.059304821915952e-07, "loss": 0.6473, "step": 2020 }, { "epoch": 0.3260729267505647, "grad_norm": 1.4322040692910176, "learning_rate": 8.057194255022397e-07, "loss": 0.4564, "step": 2021 }, { "epoch": 0.32623426911907066, "grad_norm": 2.2450052559500087, "learning_rate": 8.055082817792403e-07, "loss": 0.6299, "step": 2022 }, { "epoch": 0.3263956114875766, "grad_norm": 1.6709839772769468, "learning_rate": 8.052970510827061e-07, "loss": 0.669, "step": 2023 }, { "epoch": 0.32655695385608263, "grad_norm": 1.7041387078464136, "learning_rate": 8.050857334727718e-07, "loss": 0.4783, "step": 2024 }, { "epoch": 0.3267182962245886, "grad_norm": 1.48889860229898, "learning_rate": 8.048743290095957e-07, "loss": 0.5432, "step": 2025 }, { "epoch": 0.32687963859309455, "grad_norm": 1.530353049889439, "learning_rate": 8.046628377533618e-07, "loss": 0.5418, "step": 2026 }, { "epoch": 0.3270409809616005, "grad_norm": 1.7221308661742587, "learning_rate": 8.04451259764278e-07, "loss": 0.6019, "step": 2027 }, { "epoch": 0.32720232333010646, "grad_norm": 1.6620968236572584, "learning_rate": 8.04239595102578e-07, "loss": 0.3222, "step": 2028 }, { "epoch": 0.32736366569861247, "grad_norm": 1.3645229423302376, "learning_rate": 8.040278438285189e-07, "loss": 0.4121, "step": 2029 }, { "epoch": 0.32752500806711843, "grad_norm": 1.2648323779569797, "learning_rate": 8.038160060023833e-07, "loss": 0.4577, "step": 2030 }, { "epoch": 0.3276863504356244, "grad_norm": 1.5417361200857462, "learning_rate": 8.03604081684478e-07, "loss": 0.4491, "step": 2031 }, { "epoch": 0.32784769280413034, "grad_norm": 2.133695314910003, "learning_rate": 8.033920709351349e-07, "loss": 0.7619, "step": 2032 }, { "epoch": 0.32800903517263635, "grad_norm": 2.0897167702004458, "learning_rate": 8.031799738147099e-07, "loss": 0.682, "step": 2033 }, { "epoch": 0.3281703775411423, "grad_norm": 1.408655482847305, "learning_rate": 8.029677903835841e-07, "loss": 0.4805, "step": 2034 }, { "epoch": 0.32833171990964827, "grad_norm": 1.531362101285619, "learning_rate": 8.027555207021628e-07, "loss": 0.475, "step": 2035 }, { "epoch": 0.3284930622781542, "grad_norm": 1.3493477199212187, "learning_rate": 8.025431648308758e-07, "loss": 0.377, "step": 2036 }, { "epoch": 0.32865440464666024, "grad_norm": 1.7137987432914414, "learning_rate": 8.023307228301777e-07, "loss": 0.4547, "step": 2037 }, { "epoch": 0.3288157470151662, "grad_norm": 1.8330478734966966, "learning_rate": 8.021181947605472e-07, "loss": 0.5209, "step": 2038 }, { "epoch": 0.32897708938367215, "grad_norm": 2.351603865655693, "learning_rate": 8.019055806824881e-07, "loss": 0.4698, "step": 2039 }, { "epoch": 0.3291384317521781, "grad_norm": 1.2238873508513162, "learning_rate": 8.016928806565286e-07, "loss": 0.5295, "step": 2040 }, { "epoch": 0.3292997741206841, "grad_norm": 1.7383227396953418, "learning_rate": 8.014800947432206e-07, "loss": 0.549, "step": 2041 }, { "epoch": 0.3294611164891901, "grad_norm": 1.9114581393069412, "learning_rate": 8.012672230031412e-07, "loss": 0.554, "step": 2042 }, { "epoch": 0.32962245885769603, "grad_norm": 1.4321759356357073, "learning_rate": 8.010542654968919e-07, "loss": 0.4115, "step": 2043 }, { "epoch": 0.329783801226202, "grad_norm": 1.348867032804301, "learning_rate": 8.008412222850982e-07, "loss": 0.4483, "step": 2044 }, { "epoch": 0.32994514359470795, "grad_norm": 1.4790149485546438, "learning_rate": 8.006280934284104e-07, "loss": 0.5452, "step": 2045 }, { "epoch": 0.33010648596321396, "grad_norm": 1.457976006827192, "learning_rate": 8.004148789875031e-07, "loss": 0.5018, "step": 2046 }, { "epoch": 0.3302678283317199, "grad_norm": 1.8492822569307668, "learning_rate": 8.00201579023075e-07, "loss": 0.6272, "step": 2047 }, { "epoch": 0.33042917070022587, "grad_norm": 1.774703393251375, "learning_rate": 7.999881935958493e-07, "loss": 0.5783, "step": 2048 }, { "epoch": 0.33059051306873183, "grad_norm": 2.1395328826552618, "learning_rate": 7.997747227665738e-07, "loss": 0.4511, "step": 2049 }, { "epoch": 0.33075185543723784, "grad_norm": 1.4639966626259542, "learning_rate": 7.9956116659602e-07, "loss": 0.518, "step": 2050 }, { "epoch": 0.3309131978057438, "grad_norm": 1.5772030847488796, "learning_rate": 7.993475251449846e-07, "loss": 0.5583, "step": 2051 }, { "epoch": 0.33107454017424975, "grad_norm": 1.9355805641421993, "learning_rate": 7.991337984742877e-07, "loss": 0.5946, "step": 2052 }, { "epoch": 0.3312358825427557, "grad_norm": 1.1795404102506908, "learning_rate": 7.989199866447742e-07, "loss": 0.5203, "step": 2053 }, { "epoch": 0.3313972249112617, "grad_norm": 1.8448850726176962, "learning_rate": 7.987060897173127e-07, "loss": 0.5815, "step": 2054 }, { "epoch": 0.3315585672797677, "grad_norm": 1.7615729903930117, "learning_rate": 7.98492107752797e-07, "loss": 0.4794, "step": 2055 }, { "epoch": 0.33171990964827364, "grad_norm": 1.987915664967903, "learning_rate": 7.982780408121439e-07, "loss": 0.5447, "step": 2056 }, { "epoch": 0.3318812520167796, "grad_norm": 2.0819944911667703, "learning_rate": 7.980638889562951e-07, "loss": 0.6782, "step": 2057 }, { "epoch": 0.33204259438528555, "grad_norm": 1.7514743044289023, "learning_rate": 7.978496522462167e-07, "loss": 0.6694, "step": 2058 }, { "epoch": 0.33220393675379156, "grad_norm": 1.3136013950176708, "learning_rate": 7.976353307428982e-07, "loss": 0.6262, "step": 2059 }, { "epoch": 0.3323652791222975, "grad_norm": 1.8917286071495325, "learning_rate": 7.97420924507354e-07, "loss": 0.6093, "step": 2060 }, { "epoch": 0.3325266214908035, "grad_norm": 2.0837115008133504, "learning_rate": 7.972064336006219e-07, "loss": 0.6364, "step": 2061 }, { "epoch": 0.33268796385930943, "grad_norm": 1.8907323129006472, "learning_rate": 7.969918580837646e-07, "loss": 0.5405, "step": 2062 }, { "epoch": 0.33284930622781544, "grad_norm": 2.3237121542833905, "learning_rate": 7.967771980178682e-07, "loss": 0.6372, "step": 2063 }, { "epoch": 0.3330106485963214, "grad_norm": 2.3464412242617105, "learning_rate": 7.965624534640432e-07, "loss": 0.8757, "step": 2064 }, { "epoch": 0.33317199096482736, "grad_norm": 1.4525145202016, "learning_rate": 7.963476244834241e-07, "loss": 0.5378, "step": 2065 }, { "epoch": 0.3333333333333333, "grad_norm": 1.4112289871124626, "learning_rate": 7.961327111371694e-07, "loss": 0.4824, "step": 2066 }, { "epoch": 0.3334946757018393, "grad_norm": 1.844121798512347, "learning_rate": 7.959177134864617e-07, "loss": 0.5074, "step": 2067 }, { "epoch": 0.3336560180703453, "grad_norm": 1.3497000855629475, "learning_rate": 7.957026315925072e-07, "loss": 0.493, "step": 2068 }, { "epoch": 0.33381736043885124, "grad_norm": 1.166520768080046, "learning_rate": 7.954874655165371e-07, "loss": 0.434, "step": 2069 }, { "epoch": 0.3339787028073572, "grad_norm": 1.7479828380597704, "learning_rate": 7.952722153198054e-07, "loss": 0.5328, "step": 2070 }, { "epoch": 0.33414004517586315, "grad_norm": 2.05766805432943, "learning_rate": 7.950568810635907e-07, "loss": 0.6482, "step": 2071 }, { "epoch": 0.33430138754436917, "grad_norm": 1.2623289540062679, "learning_rate": 7.948414628091952e-07, "loss": 0.3163, "step": 2072 }, { "epoch": 0.3344627299128751, "grad_norm": 2.065077356178095, "learning_rate": 7.946259606179453e-07, "loss": 0.7973, "step": 2073 }, { "epoch": 0.3346240722813811, "grad_norm": 1.311727069149026, "learning_rate": 7.944103745511912e-07, "loss": 0.4498, "step": 2074 }, { "epoch": 0.33478541464988704, "grad_norm": 1.9588877995457286, "learning_rate": 7.94194704670307e-07, "loss": 0.4336, "step": 2075 }, { "epoch": 0.33494675701839305, "grad_norm": 2.0664890900071677, "learning_rate": 7.939789510366905e-07, "loss": 0.6451, "step": 2076 }, { "epoch": 0.335108099386899, "grad_norm": 1.4980983600945381, "learning_rate": 7.937631137117635e-07, "loss": 0.3049, "step": 2077 }, { "epoch": 0.33526944175540496, "grad_norm": 1.5658299629834282, "learning_rate": 7.935471927569717e-07, "loss": 0.7391, "step": 2078 }, { "epoch": 0.3354307841239109, "grad_norm": 1.7869189218231352, "learning_rate": 7.933311882337842e-07, "loss": 0.5344, "step": 2079 }, { "epoch": 0.33559212649241693, "grad_norm": 1.9908665485041022, "learning_rate": 7.931151002036944e-07, "loss": 0.7145, "step": 2080 }, { "epoch": 0.3357534688609229, "grad_norm": 1.8196954295218017, "learning_rate": 7.928989287282194e-07, "loss": 0.5447, "step": 2081 }, { "epoch": 0.33591481122942884, "grad_norm": 2.111280244889564, "learning_rate": 7.926826738688996e-07, "loss": 0.626, "step": 2082 }, { "epoch": 0.3360761535979348, "grad_norm": 1.4225200824627278, "learning_rate": 7.924663356872997e-07, "loss": 0.5766, "step": 2083 }, { "epoch": 0.3362374959664408, "grad_norm": 1.6739173607202447, "learning_rate": 7.922499142450077e-07, "loss": 0.7601, "step": 2084 }, { "epoch": 0.33639883833494677, "grad_norm": 1.87191970848225, "learning_rate": 7.920334096036354e-07, "loss": 0.6196, "step": 2085 }, { "epoch": 0.3365601807034527, "grad_norm": 2.3023471134550846, "learning_rate": 7.918168218248187e-07, "loss": 0.6221, "step": 2086 }, { "epoch": 0.3367215230719587, "grad_norm": 1.4900090158432973, "learning_rate": 7.916001509702166e-07, "loss": 0.5199, "step": 2087 }, { "epoch": 0.33688286544046464, "grad_norm": 1.3485579418086422, "learning_rate": 7.91383397101512e-07, "loss": 0.5971, "step": 2088 }, { "epoch": 0.33704420780897065, "grad_norm": 1.892418885199719, "learning_rate": 7.911665602804113e-07, "loss": 0.6905, "step": 2089 }, { "epoch": 0.3372055501774766, "grad_norm": 1.4224389603161167, "learning_rate": 7.909496405686448e-07, "loss": 0.3895, "step": 2090 }, { "epoch": 0.33736689254598257, "grad_norm": 1.44755561590001, "learning_rate": 7.907326380279662e-07, "loss": 0.479, "step": 2091 }, { "epoch": 0.3375282349144885, "grad_norm": 1.647138327695262, "learning_rate": 7.905155527201527e-07, "loss": 0.5308, "step": 2092 }, { "epoch": 0.33768957728299454, "grad_norm": 1.4064255075076069, "learning_rate": 7.902983847070053e-07, "loss": 0.5202, "step": 2093 }, { "epoch": 0.3378509196515005, "grad_norm": 1.6444879014198492, "learning_rate": 7.900811340503483e-07, "loss": 0.4907, "step": 2094 }, { "epoch": 0.33801226202000645, "grad_norm": 1.7585242292846235, "learning_rate": 7.898638008120298e-07, "loss": 0.5743, "step": 2095 }, { "epoch": 0.3381736043885124, "grad_norm": 1.6889033663419184, "learning_rate": 7.896463850539209e-07, "loss": 0.5045, "step": 2096 }, { "epoch": 0.3383349467570184, "grad_norm": 1.634304770133251, "learning_rate": 7.894288868379168e-07, "loss": 0.6713, "step": 2097 }, { "epoch": 0.3384962891255244, "grad_norm": 1.9681219885552614, "learning_rate": 7.89211306225936e-07, "loss": 0.4907, "step": 2098 }, { "epoch": 0.33865763149403033, "grad_norm": 1.4358523504437286, "learning_rate": 7.889936432799203e-07, "loss": 0.5497, "step": 2099 }, { "epoch": 0.3388189738625363, "grad_norm": 1.6192305186756064, "learning_rate": 7.887758980618349e-07, "loss": 0.6483, "step": 2100 }, { "epoch": 0.33898031623104224, "grad_norm": 1.7951707138234538, "learning_rate": 7.885580706336686e-07, "loss": 0.7285, "step": 2101 }, { "epoch": 0.33914165859954826, "grad_norm": 1.777735807334006, "learning_rate": 7.883401610574336e-07, "loss": 0.555, "step": 2102 }, { "epoch": 0.3393030009680542, "grad_norm": 1.4945344692580413, "learning_rate": 7.881221693951652e-07, "loss": 0.4699, "step": 2103 }, { "epoch": 0.33946434333656017, "grad_norm": 1.3666440046959154, "learning_rate": 7.879040957089227e-07, "loss": 0.3599, "step": 2104 }, { "epoch": 0.3396256857050661, "grad_norm": 1.471335387883713, "learning_rate": 7.876859400607879e-07, "loss": 0.5003, "step": 2105 }, { "epoch": 0.33978702807357214, "grad_norm": 1.9833665222575323, "learning_rate": 7.874677025128666e-07, "loss": 0.4676, "step": 2106 }, { "epoch": 0.3399483704420781, "grad_norm": 1.8384445928462396, "learning_rate": 7.872493831272877e-07, "loss": 0.6302, "step": 2107 }, { "epoch": 0.34010971281058405, "grad_norm": 1.5416717099631359, "learning_rate": 7.870309819662034e-07, "loss": 0.5268, "step": 2108 }, { "epoch": 0.34027105517909, "grad_norm": 1.5602046128113745, "learning_rate": 7.86812499091789e-07, "loss": 0.3956, "step": 2109 }, { "epoch": 0.340432397547596, "grad_norm": 1.8443404076750405, "learning_rate": 7.865939345662436e-07, "loss": 0.5951, "step": 2110 }, { "epoch": 0.340593739916102, "grad_norm": 1.3128926734261392, "learning_rate": 7.863752884517887e-07, "loss": 0.462, "step": 2111 }, { "epoch": 0.34075508228460794, "grad_norm": 1.5400254388665005, "learning_rate": 7.861565608106699e-07, "loss": 0.3754, "step": 2112 }, { "epoch": 0.3409164246531139, "grad_norm": 3.6691372668627507, "learning_rate": 7.859377517051554e-07, "loss": 0.5732, "step": 2113 }, { "epoch": 0.3410777670216199, "grad_norm": 1.4238608609008974, "learning_rate": 7.857188611975366e-07, "loss": 0.4516, "step": 2114 }, { "epoch": 0.34123910939012586, "grad_norm": 1.8475821159603907, "learning_rate": 7.854998893501288e-07, "loss": 0.478, "step": 2115 }, { "epoch": 0.3414004517586318, "grad_norm": 1.5398361662625195, "learning_rate": 7.852808362252696e-07, "loss": 0.4541, "step": 2116 }, { "epoch": 0.3415617941271378, "grad_norm": 1.9850025536354992, "learning_rate": 7.850617018853203e-07, "loss": 0.5629, "step": 2117 }, { "epoch": 0.34172313649564373, "grad_norm": 2.1220139111090743, "learning_rate": 7.848424863926648e-07, "loss": 0.6521, "step": 2118 }, { "epoch": 0.34188447886414974, "grad_norm": 1.5510630315569218, "learning_rate": 7.846231898097105e-07, "loss": 0.3951, "step": 2119 }, { "epoch": 0.3420458212326557, "grad_norm": 1.4505073186038195, "learning_rate": 7.844038121988878e-07, "loss": 0.6031, "step": 2120 }, { "epoch": 0.34220716360116166, "grad_norm": 2.1285633055915074, "learning_rate": 7.841843536226503e-07, "loss": 0.6841, "step": 2121 }, { "epoch": 0.3423685059696676, "grad_norm": 2.381102101296136, "learning_rate": 7.839648141434742e-07, "loss": 0.5861, "step": 2122 }, { "epoch": 0.3425298483381736, "grad_norm": 1.4713708747207694, "learning_rate": 7.837451938238594e-07, "loss": 0.544, "step": 2123 }, { "epoch": 0.3426911907066796, "grad_norm": 1.2962580672687165, "learning_rate": 7.835254927263281e-07, "loss": 0.4706, "step": 2124 }, { "epoch": 0.34285253307518554, "grad_norm": 1.2566554748397682, "learning_rate": 7.83305710913426e-07, "loss": 0.4796, "step": 2125 }, { "epoch": 0.3430138754436915, "grad_norm": 1.924837016352362, "learning_rate": 7.830858484477217e-07, "loss": 0.7389, "step": 2126 }, { "epoch": 0.3431752178121975, "grad_norm": 1.819816292514433, "learning_rate": 7.828659053918066e-07, "loss": 0.7591, "step": 2127 }, { "epoch": 0.34333656018070347, "grad_norm": 1.5548000582487675, "learning_rate": 7.826458818082951e-07, "loss": 0.6193, "step": 2128 }, { "epoch": 0.3434979025492094, "grad_norm": 1.50322995523284, "learning_rate": 7.824257777598245e-07, "loss": 0.6361, "step": 2129 }, { "epoch": 0.3436592449177154, "grad_norm": 1.9488679191730351, "learning_rate": 7.82205593309055e-07, "loss": 0.7985, "step": 2130 }, { "epoch": 0.34382058728622134, "grad_norm": 1.2089992639367424, "learning_rate": 7.819853285186702e-07, "loss": 0.4915, "step": 2131 }, { "epoch": 0.34398192965472735, "grad_norm": 1.8769180659772022, "learning_rate": 7.817649834513754e-07, "loss": 0.6646, "step": 2132 }, { "epoch": 0.3441432720232333, "grad_norm": 1.9183584155385378, "learning_rate": 7.815445581699001e-07, "loss": 0.6994, "step": 2133 }, { "epoch": 0.34430461439173926, "grad_norm": 1.7168327735732334, "learning_rate": 7.813240527369958e-07, "loss": 0.4335, "step": 2134 }, { "epoch": 0.3444659567602452, "grad_norm": 1.5010931482251317, "learning_rate": 7.811034672154368e-07, "loss": 0.2913, "step": 2135 }, { "epoch": 0.34462729912875123, "grad_norm": 1.639350759579337, "learning_rate": 7.808828016680207e-07, "loss": 0.6054, "step": 2136 }, { "epoch": 0.3447886414972572, "grad_norm": 1.5271431481415787, "learning_rate": 7.806620561575674e-07, "loss": 0.3721, "step": 2137 }, { "epoch": 0.34494998386576314, "grad_norm": 1.7260433469436869, "learning_rate": 7.804412307469199e-07, "loss": 0.6298, "step": 2138 }, { "epoch": 0.3451113262342691, "grad_norm": 1.837108417229278, "learning_rate": 7.802203254989439e-07, "loss": 0.7676, "step": 2139 }, { "epoch": 0.3452726686027751, "grad_norm": 1.808817377951117, "learning_rate": 7.799993404765275e-07, "loss": 0.4997, "step": 2140 }, { "epoch": 0.34543401097128107, "grad_norm": 1.9452401879287067, "learning_rate": 7.797782757425821e-07, "loss": 0.6652, "step": 2141 }, { "epoch": 0.345595353339787, "grad_norm": 1.7638349658336485, "learning_rate": 7.79557131360041e-07, "loss": 0.4176, "step": 2142 }, { "epoch": 0.345756695708293, "grad_norm": 1.5927838407605917, "learning_rate": 7.793359073918611e-07, "loss": 0.465, "step": 2143 }, { "epoch": 0.345918038076799, "grad_norm": 1.4389743707533704, "learning_rate": 7.791146039010211e-07, "loss": 0.4279, "step": 2144 }, { "epoch": 0.34607938044530495, "grad_norm": 1.3726774019751284, "learning_rate": 7.78893220950523e-07, "loss": 0.4571, "step": 2145 }, { "epoch": 0.3462407228138109, "grad_norm": 1.642765356544445, "learning_rate": 7.786717586033908e-07, "loss": 0.4918, "step": 2146 }, { "epoch": 0.34640206518231687, "grad_norm": 1.5745921863402255, "learning_rate": 7.784502169226719e-07, "loss": 0.4951, "step": 2147 }, { "epoch": 0.3465634075508228, "grad_norm": 1.3170104408950967, "learning_rate": 7.782285959714353e-07, "loss": 0.5279, "step": 2148 }, { "epoch": 0.34672474991932883, "grad_norm": 1.563678297172126, "learning_rate": 7.780068958127736e-07, "loss": 0.4812, "step": 2149 }, { "epoch": 0.3468860922878348, "grad_norm": 1.5992455819864866, "learning_rate": 7.777851165098011e-07, "loss": 0.5081, "step": 2150 }, { "epoch": 0.34704743465634075, "grad_norm": 1.3753668989168117, "learning_rate": 7.77563258125655e-07, "loss": 0.5, "step": 2151 }, { "epoch": 0.3472087770248467, "grad_norm": 1.1823174169931912, "learning_rate": 7.773413207234953e-07, "loss": 0.6432, "step": 2152 }, { "epoch": 0.3473701193933527, "grad_norm": 1.868209112474658, "learning_rate": 7.771193043665038e-07, "loss": 0.4955, "step": 2153 }, { "epoch": 0.3475314617618587, "grad_norm": 1.4585771720119842, "learning_rate": 7.768972091178853e-07, "loss": 0.4824, "step": 2154 }, { "epoch": 0.34769280413036463, "grad_norm": 1.4857872117793764, "learning_rate": 7.766750350408671e-07, "loss": 0.5835, "step": 2155 }, { "epoch": 0.3478541464988706, "grad_norm": 1.0389078928716013, "learning_rate": 7.764527821986986e-07, "loss": 0.4091, "step": 2156 }, { "epoch": 0.3480154888673766, "grad_norm": 1.7986601876428945, "learning_rate": 7.762304506546518e-07, "loss": 0.6659, "step": 2157 }, { "epoch": 0.34817683123588256, "grad_norm": 2.1672485254960145, "learning_rate": 7.760080404720209e-07, "loss": 0.6188, "step": 2158 }, { "epoch": 0.3483381736043885, "grad_norm": 1.7109105356260268, "learning_rate": 7.757855517141228e-07, "loss": 0.569, "step": 2159 }, { "epoch": 0.34849951597289447, "grad_norm": 1.7620813373017057, "learning_rate": 7.755629844442971e-07, "loss": 0.5191, "step": 2160 }, { "epoch": 0.3486608583414004, "grad_norm": 2.1117784159335775, "learning_rate": 7.753403387259045e-07, "loss": 0.5243, "step": 2161 }, { "epoch": 0.34882220070990644, "grad_norm": 1.745622473147073, "learning_rate": 7.751176146223293e-07, "loss": 0.7291, "step": 2162 }, { "epoch": 0.3489835430784124, "grad_norm": 1.753981556794343, "learning_rate": 7.748948121969777e-07, "loss": 0.6086, "step": 2163 }, { "epoch": 0.34914488544691835, "grad_norm": 1.8424063490277447, "learning_rate": 7.746719315132779e-07, "loss": 0.6642, "step": 2164 }, { "epoch": 0.3493062278154243, "grad_norm": 1.872180026107436, "learning_rate": 7.744489726346808e-07, "loss": 0.494, "step": 2165 }, { "epoch": 0.3494675701839303, "grad_norm": 1.31098678187058, "learning_rate": 7.742259356246594e-07, "loss": 0.418, "step": 2166 }, { "epoch": 0.3496289125524363, "grad_norm": 1.546200691985041, "learning_rate": 7.740028205467086e-07, "loss": 0.4608, "step": 2167 }, { "epoch": 0.34979025492094223, "grad_norm": 1.6899353397572419, "learning_rate": 7.737796274643464e-07, "loss": 0.5949, "step": 2168 }, { "epoch": 0.3499515972894482, "grad_norm": 1.5412642323129973, "learning_rate": 7.73556356441112e-07, "loss": 0.4792, "step": 2169 }, { "epoch": 0.3501129396579542, "grad_norm": 1.6094567731804195, "learning_rate": 7.733330075405674e-07, "loss": 0.5601, "step": 2170 }, { "epoch": 0.35027428202646016, "grad_norm": 1.492553346533562, "learning_rate": 7.731095808262965e-07, "loss": 0.5999, "step": 2171 }, { "epoch": 0.3504356243949661, "grad_norm": 1.8130155027582022, "learning_rate": 7.728860763619058e-07, "loss": 0.6258, "step": 2172 }, { "epoch": 0.3505969667634721, "grad_norm": 1.6873192160513313, "learning_rate": 7.726624942110232e-07, "loss": 0.7149, "step": 2173 }, { "epoch": 0.35075830913197803, "grad_norm": 1.7678213047066595, "learning_rate": 7.724388344372995e-07, "loss": 0.4365, "step": 2174 }, { "epoch": 0.35091965150048404, "grad_norm": 1.71683839784648, "learning_rate": 7.722150971044068e-07, "loss": 0.6248, "step": 2175 }, { "epoch": 0.35108099386899, "grad_norm": 1.7070016400106427, "learning_rate": 7.719912822760399e-07, "loss": 0.5649, "step": 2176 }, { "epoch": 0.35124233623749596, "grad_norm": 1.6726485984944388, "learning_rate": 7.717673900159156e-07, "loss": 0.3862, "step": 2177 }, { "epoch": 0.3514036786060019, "grad_norm": 2.079387310095977, "learning_rate": 7.715434203877724e-07, "loss": 0.6343, "step": 2178 }, { "epoch": 0.3515650209745079, "grad_norm": 1.0891965633064937, "learning_rate": 7.713193734553712e-07, "loss": 0.4677, "step": 2179 }, { "epoch": 0.3517263633430139, "grad_norm": 1.42047494561545, "learning_rate": 7.710952492824945e-07, "loss": 0.4553, "step": 2180 }, { "epoch": 0.35188770571151984, "grad_norm": 2.0092635199440925, "learning_rate": 7.708710479329472e-07, "loss": 0.4896, "step": 2181 }, { "epoch": 0.3520490480800258, "grad_norm": 2.0027867214563426, "learning_rate": 7.70646769470556e-07, "loss": 0.6705, "step": 2182 }, { "epoch": 0.3522103904485318, "grad_norm": 1.4104909314767022, "learning_rate": 7.704224139591695e-07, "loss": 0.3961, "step": 2183 }, { "epoch": 0.35237173281703776, "grad_norm": 1.5407492597195347, "learning_rate": 7.701979814626582e-07, "loss": 0.4271, "step": 2184 }, { "epoch": 0.3525330751855437, "grad_norm": 1.6536495739351353, "learning_rate": 7.699734720449148e-07, "loss": 0.5983, "step": 2185 }, { "epoch": 0.3526944175540497, "grad_norm": 1.644286438758447, "learning_rate": 7.697488857698535e-07, "loss": 0.3967, "step": 2186 }, { "epoch": 0.3528557599225557, "grad_norm": 1.2701997376115084, "learning_rate": 7.695242227014107e-07, "loss": 0.3852, "step": 2187 }, { "epoch": 0.35301710229106165, "grad_norm": 1.752119279720257, "learning_rate": 7.692994829035445e-07, "loss": 0.5148, "step": 2188 }, { "epoch": 0.3531784446595676, "grad_norm": 1.4849406720521767, "learning_rate": 7.690746664402348e-07, "loss": 0.456, "step": 2189 }, { "epoch": 0.35333978702807356, "grad_norm": 1.664380969883078, "learning_rate": 7.688497733754835e-07, "loss": 0.5579, "step": 2190 }, { "epoch": 0.3535011293965795, "grad_norm": 1.886366785679807, "learning_rate": 7.686248037733143e-07, "loss": 0.6158, "step": 2191 }, { "epoch": 0.35366247176508553, "grad_norm": 1.5891057581532146, "learning_rate": 7.683997576977723e-07, "loss": 0.6148, "step": 2192 }, { "epoch": 0.3538238141335915, "grad_norm": 1.7916763734184922, "learning_rate": 7.68174635212925e-07, "loss": 0.5672, "step": 2193 }, { "epoch": 0.35398515650209744, "grad_norm": 1.5560621002622088, "learning_rate": 7.679494363828613e-07, "loss": 0.668, "step": 2194 }, { "epoch": 0.3541464988706034, "grad_norm": 1.7162255214159639, "learning_rate": 7.677241612716916e-07, "loss": 0.7017, "step": 2195 }, { "epoch": 0.3543078412391094, "grad_norm": 1.5858235435698171, "learning_rate": 7.674988099435486e-07, "loss": 0.5483, "step": 2196 }, { "epoch": 0.35446918360761537, "grad_norm": 1.615165810473233, "learning_rate": 7.672733824625861e-07, "loss": 0.6383, "step": 2197 }, { "epoch": 0.3546305259761213, "grad_norm": 1.8986397466737959, "learning_rate": 7.670478788929802e-07, "loss": 0.6069, "step": 2198 }, { "epoch": 0.3547918683446273, "grad_norm": 1.8280735497692888, "learning_rate": 7.668222992989277e-07, "loss": 0.5197, "step": 2199 }, { "epoch": 0.3549532107131333, "grad_norm": 1.70680615929171, "learning_rate": 7.665966437446484e-07, "loss": 0.6054, "step": 2200 }, { "epoch": 0.35511455308163925, "grad_norm": 1.2894431736980156, "learning_rate": 7.663709122943825e-07, "loss": 0.4367, "step": 2201 }, { "epoch": 0.3552758954501452, "grad_norm": 1.9252518551040705, "learning_rate": 7.661451050123924e-07, "loss": 0.473, "step": 2202 }, { "epoch": 0.35543723781865116, "grad_norm": 1.7041878140108189, "learning_rate": 7.659192219629622e-07, "loss": 0.5265, "step": 2203 }, { "epoch": 0.3555985801871571, "grad_norm": 1.6704359189176354, "learning_rate": 7.65693263210397e-07, "loss": 0.4833, "step": 2204 }, { "epoch": 0.35575992255566313, "grad_norm": 2.03564224737144, "learning_rate": 7.654672288190239e-07, "loss": 0.7695, "step": 2205 }, { "epoch": 0.3559212649241691, "grad_norm": 1.8247904565781523, "learning_rate": 7.652411188531916e-07, "loss": 0.6084, "step": 2206 }, { "epoch": 0.35608260729267505, "grad_norm": 1.8476367651010774, "learning_rate": 7.6501493337727e-07, "loss": 0.5988, "step": 2207 }, { "epoch": 0.356243949661181, "grad_norm": 1.5403642135795952, "learning_rate": 7.647886724556507e-07, "loss": 0.5442, "step": 2208 }, { "epoch": 0.356405292029687, "grad_norm": 2.0630411536308984, "learning_rate": 7.64562336152747e-07, "loss": 0.676, "step": 2209 }, { "epoch": 0.35656663439819297, "grad_norm": 1.8766128596807117, "learning_rate": 7.643359245329925e-07, "loss": 0.6797, "step": 2210 }, { "epoch": 0.35672797676669893, "grad_norm": 1.636001611890675, "learning_rate": 7.641094376608441e-07, "loss": 0.5868, "step": 2211 }, { "epoch": 0.3568893191352049, "grad_norm": 1.382525829191172, "learning_rate": 7.638828756007787e-07, "loss": 0.4425, "step": 2212 }, { "epoch": 0.3570506615037109, "grad_norm": 1.4027441191753114, "learning_rate": 7.636562384172951e-07, "loss": 0.4438, "step": 2213 }, { "epoch": 0.35721200387221685, "grad_norm": 1.879988455838411, "learning_rate": 7.634295261749135e-07, "loss": 0.6722, "step": 2214 }, { "epoch": 0.3573733462407228, "grad_norm": 1.8332678031769618, "learning_rate": 7.632027389381754e-07, "loss": 0.4791, "step": 2215 }, { "epoch": 0.35753468860922877, "grad_norm": 1.7729767188164338, "learning_rate": 7.629758767716436e-07, "loss": 0.6042, "step": 2216 }, { "epoch": 0.3576960309777348, "grad_norm": 1.5282370060162844, "learning_rate": 7.627489397399023e-07, "loss": 0.4612, "step": 2217 }, { "epoch": 0.35785737334624074, "grad_norm": 1.245404857240452, "learning_rate": 7.62521927907557e-07, "loss": 0.5032, "step": 2218 }, { "epoch": 0.3580187157147467, "grad_norm": 1.686706639043627, "learning_rate": 7.622948413392344e-07, "loss": 0.7334, "step": 2219 }, { "epoch": 0.35818005808325265, "grad_norm": 1.226202553580703, "learning_rate": 7.620676800995829e-07, "loss": 0.5745, "step": 2220 }, { "epoch": 0.3583414004517586, "grad_norm": 2.247869754710683, "learning_rate": 7.618404442532716e-07, "loss": 0.5654, "step": 2221 }, { "epoch": 0.3585027428202646, "grad_norm": 1.8494348332871, "learning_rate": 7.616131338649907e-07, "loss": 0.4689, "step": 2222 }, { "epoch": 0.3586640851887706, "grad_norm": 1.6091631407314702, "learning_rate": 7.613857489994528e-07, "loss": 0.5, "step": 2223 }, { "epoch": 0.35882542755727653, "grad_norm": 1.6796904098130683, "learning_rate": 7.611582897213902e-07, "loss": 0.4994, "step": 2224 }, { "epoch": 0.3589867699257825, "grad_norm": 1.6998224698661593, "learning_rate": 7.609307560955576e-07, "loss": 0.4466, "step": 2225 }, { "epoch": 0.3591481122942885, "grad_norm": 1.7771784108149011, "learning_rate": 7.607031481867299e-07, "loss": 0.4475, "step": 2226 }, { "epoch": 0.35930945466279446, "grad_norm": 1.3766208978361665, "learning_rate": 7.604754660597039e-07, "loss": 0.481, "step": 2227 }, { "epoch": 0.3594707970313004, "grad_norm": 1.4172073716155222, "learning_rate": 7.60247709779297e-07, "loss": 0.3097, "step": 2228 }, { "epoch": 0.35963213939980637, "grad_norm": 1.6281601182660999, "learning_rate": 7.600198794103479e-07, "loss": 0.5182, "step": 2229 }, { "epoch": 0.3597934817683124, "grad_norm": 1.7240583807581606, "learning_rate": 7.597919750177168e-07, "loss": 0.5713, "step": 2230 }, { "epoch": 0.35995482413681834, "grad_norm": 1.671012584682265, "learning_rate": 7.595639966662842e-07, "loss": 0.5098, "step": 2231 }, { "epoch": 0.3601161665053243, "grad_norm": 1.728859594571433, "learning_rate": 7.593359444209522e-07, "loss": 0.629, "step": 2232 }, { "epoch": 0.36027750887383025, "grad_norm": 1.7904486471149996, "learning_rate": 7.591078183466438e-07, "loss": 0.4531, "step": 2233 }, { "epoch": 0.3604388512423362, "grad_norm": 1.6304937809483677, "learning_rate": 7.588796185083028e-07, "loss": 0.5855, "step": 2234 }, { "epoch": 0.3606001936108422, "grad_norm": 1.38704812879607, "learning_rate": 7.586513449708947e-07, "loss": 0.5062, "step": 2235 }, { "epoch": 0.3607615359793482, "grad_norm": 1.5605595555942096, "learning_rate": 7.58422997799405e-07, "loss": 0.4839, "step": 2236 }, { "epoch": 0.36092287834785414, "grad_norm": 1.5296184645009157, "learning_rate": 7.581945770588409e-07, "loss": 0.4424, "step": 2237 }, { "epoch": 0.3610842207163601, "grad_norm": 1.9947588314478113, "learning_rate": 7.5796608281423e-07, "loss": 0.5738, "step": 2238 }, { "epoch": 0.3612455630848661, "grad_norm": 2.189098101373377, "learning_rate": 7.577375151306216e-07, "loss": 0.6245, "step": 2239 }, { "epoch": 0.36140690545337206, "grad_norm": 3.002944296204303, "learning_rate": 7.575088740730848e-07, "loss": 0.5443, "step": 2240 }, { "epoch": 0.361568247821878, "grad_norm": 1.967650394047787, "learning_rate": 7.572801597067109e-07, "loss": 0.6084, "step": 2241 }, { "epoch": 0.361729590190384, "grad_norm": 1.4688809113294687, "learning_rate": 7.570513720966107e-07, "loss": 0.5299, "step": 2242 }, { "epoch": 0.36189093255889, "grad_norm": 2.0688870110909017, "learning_rate": 7.568225113079171e-07, "loss": 0.621, "step": 2243 }, { "epoch": 0.36205227492739595, "grad_norm": 1.6166925087400392, "learning_rate": 7.565935774057831e-07, "loss": 0.4901, "step": 2244 }, { "epoch": 0.3622136172959019, "grad_norm": 1.4017413447320186, "learning_rate": 7.563645704553825e-07, "loss": 0.5146, "step": 2245 }, { "epoch": 0.36237495966440786, "grad_norm": 2.037795685451746, "learning_rate": 7.561354905219102e-07, "loss": 0.6513, "step": 2246 }, { "epoch": 0.36253630203291387, "grad_norm": 1.807361619751673, "learning_rate": 7.559063376705816e-07, "loss": 0.3977, "step": 2247 }, { "epoch": 0.3626976444014198, "grad_norm": 1.5014060106613727, "learning_rate": 7.556771119666334e-07, "loss": 0.4487, "step": 2248 }, { "epoch": 0.3628589867699258, "grad_norm": 1.7170947340534568, "learning_rate": 7.554478134753224e-07, "loss": 0.453, "step": 2249 }, { "epoch": 0.36302032913843174, "grad_norm": 1.863092312940364, "learning_rate": 7.552184422619261e-07, "loss": 0.4914, "step": 2250 }, { "epoch": 0.3631816715069377, "grad_norm": 1.7730225064892595, "learning_rate": 7.549889983917435e-07, "loss": 0.5188, "step": 2251 }, { "epoch": 0.3633430138754437, "grad_norm": 1.4792009627257083, "learning_rate": 7.547594819300935e-07, "loss": 0.6132, "step": 2252 }, { "epoch": 0.36350435624394967, "grad_norm": 2.0156012245372845, "learning_rate": 7.545298929423158e-07, "loss": 0.4551, "step": 2253 }, { "epoch": 0.3636656986124556, "grad_norm": 1.9800586764956545, "learning_rate": 7.543002314937712e-07, "loss": 0.5504, "step": 2254 }, { "epoch": 0.3638270409809616, "grad_norm": 1.7701908591651083, "learning_rate": 7.540704976498404e-07, "loss": 0.5991, "step": 2255 }, { "epoch": 0.3639883833494676, "grad_norm": 1.6551664874879206, "learning_rate": 7.538406914759255e-07, "loss": 0.5438, "step": 2256 }, { "epoch": 0.36414972571797355, "grad_norm": 1.8532047634102236, "learning_rate": 7.536108130374485e-07, "loss": 0.5732, "step": 2257 }, { "epoch": 0.3643110680864795, "grad_norm": 2.1445295823721318, "learning_rate": 7.533808623998526e-07, "loss": 0.65, "step": 2258 }, { "epoch": 0.36447241045498546, "grad_norm": 1.3932107181456597, "learning_rate": 7.531508396286008e-07, "loss": 0.503, "step": 2259 }, { "epoch": 0.3646337528234915, "grad_norm": 2.4876529972430608, "learning_rate": 7.529207447891774e-07, "loss": 0.7081, "step": 2260 }, { "epoch": 0.36479509519199743, "grad_norm": 1.7920299723536894, "learning_rate": 7.526905779470868e-07, "loss": 0.5306, "step": 2261 }, { "epoch": 0.3649564375605034, "grad_norm": 1.4198273387223699, "learning_rate": 7.52460339167854e-07, "loss": 0.439, "step": 2262 }, { "epoch": 0.36511777992900935, "grad_norm": 1.4081607552978195, "learning_rate": 7.522300285170242e-07, "loss": 0.4176, "step": 2263 }, { "epoch": 0.3652791222975153, "grad_norm": 2.01151643494964, "learning_rate": 7.519996460601637e-07, "loss": 0.6618, "step": 2264 }, { "epoch": 0.3654404646660213, "grad_norm": 1.7041449336350427, "learning_rate": 7.517691918628588e-07, "loss": 0.6558, "step": 2265 }, { "epoch": 0.36560180703452727, "grad_norm": 1.97490004033021, "learning_rate": 7.515386659907161e-07, "loss": 0.6648, "step": 2266 }, { "epoch": 0.3657631494030332, "grad_norm": 2.2367274564999295, "learning_rate": 7.513080685093629e-07, "loss": 0.6554, "step": 2267 }, { "epoch": 0.3659244917715392, "grad_norm": 1.501604493632678, "learning_rate": 7.510773994844465e-07, "loss": 0.4282, "step": 2268 }, { "epoch": 0.3660858341400452, "grad_norm": 1.5505484687342228, "learning_rate": 7.508466589816353e-07, "loss": 0.3714, "step": 2269 }, { "epoch": 0.36624717650855115, "grad_norm": 1.3333884118545432, "learning_rate": 7.506158470666174e-07, "loss": 0.5928, "step": 2270 }, { "epoch": 0.3664085188770571, "grad_norm": 1.9024090452170075, "learning_rate": 7.503849638051015e-07, "loss": 0.6094, "step": 2271 }, { "epoch": 0.36656986124556307, "grad_norm": 1.787659207169813, "learning_rate": 7.501540092628162e-07, "loss": 0.4661, "step": 2272 }, { "epoch": 0.3667312036140691, "grad_norm": 1.5488080363756738, "learning_rate": 7.499229835055111e-07, "loss": 0.4962, "step": 2273 }, { "epoch": 0.36689254598257504, "grad_norm": 1.472356224359415, "learning_rate": 7.496918865989554e-07, "loss": 0.417, "step": 2274 }, { "epoch": 0.367053888351081, "grad_norm": 2.099641510473553, "learning_rate": 7.494607186089392e-07, "loss": 0.5788, "step": 2275 }, { "epoch": 0.36721523071958695, "grad_norm": 1.7864696253947017, "learning_rate": 7.492294796012722e-07, "loss": 0.6031, "step": 2276 }, { "epoch": 0.3673765730880929, "grad_norm": 2.2480466628882394, "learning_rate": 7.489981696417848e-07, "loss": 0.749, "step": 2277 }, { "epoch": 0.3675379154565989, "grad_norm": 1.3547642880756479, "learning_rate": 7.487667887963273e-07, "loss": 0.5751, "step": 2278 }, { "epoch": 0.3676992578251049, "grad_norm": 2.3797180093674264, "learning_rate": 7.485353371307703e-07, "loss": 0.6421, "step": 2279 }, { "epoch": 0.36786060019361083, "grad_norm": 1.510890684615586, "learning_rate": 7.483038147110046e-07, "loss": 0.439, "step": 2280 }, { "epoch": 0.3680219425621168, "grad_norm": 1.8470872321397127, "learning_rate": 7.480722216029409e-07, "loss": 0.6238, "step": 2281 }, { "epoch": 0.3681832849306228, "grad_norm": 1.4793428754844022, "learning_rate": 7.478405578725104e-07, "loss": 0.544, "step": 2282 }, { "epoch": 0.36834462729912876, "grad_norm": 1.6296924916499087, "learning_rate": 7.476088235856644e-07, "loss": 0.4452, "step": 2283 }, { "epoch": 0.3685059696676347, "grad_norm": 1.7366134357653582, "learning_rate": 7.473770188083737e-07, "loss": 0.4289, "step": 2284 }, { "epoch": 0.36866731203614067, "grad_norm": 1.425432805372616, "learning_rate": 7.471451436066297e-07, "loss": 0.5231, "step": 2285 }, { "epoch": 0.3688286544046467, "grad_norm": 1.1042899057001738, "learning_rate": 7.469131980464438e-07, "loss": 0.4987, "step": 2286 }, { "epoch": 0.36898999677315264, "grad_norm": 1.9201702217303345, "learning_rate": 7.466811821938475e-07, "loss": 0.612, "step": 2287 }, { "epoch": 0.3691513391416586, "grad_norm": 1.9540284165509463, "learning_rate": 7.46449096114892e-07, "loss": 0.6114, "step": 2288 }, { "epoch": 0.36931268151016455, "grad_norm": 1.7593251141646111, "learning_rate": 7.462169398756486e-07, "loss": 0.6415, "step": 2289 }, { "epoch": 0.36947402387867057, "grad_norm": 1.4148481515345153, "learning_rate": 7.459847135422087e-07, "loss": 0.2774, "step": 2290 }, { "epoch": 0.3696353662471765, "grad_norm": 1.8122838647819697, "learning_rate": 7.457524171806835e-07, "loss": 0.5999, "step": 2291 }, { "epoch": 0.3697967086156825, "grad_norm": 1.703302566651087, "learning_rate": 7.455200508572044e-07, "loss": 0.5175, "step": 2292 }, { "epoch": 0.36995805098418844, "grad_norm": 2.0782542941378215, "learning_rate": 7.452876146379225e-07, "loss": 0.7089, "step": 2293 }, { "epoch": 0.3701193933526944, "grad_norm": 1.466282984369895, "learning_rate": 7.450551085890087e-07, "loss": 0.4306, "step": 2294 }, { "epoch": 0.3702807357212004, "grad_norm": 1.27605314703727, "learning_rate": 7.448225327766539e-07, "loss": 0.545, "step": 2295 }, { "epoch": 0.37044207808970636, "grad_norm": 1.7620755191731043, "learning_rate": 7.44589887267069e-07, "loss": 0.5249, "step": 2296 }, { "epoch": 0.3706034204582123, "grad_norm": 1.5012623720191294, "learning_rate": 7.443571721264846e-07, "loss": 0.4002, "step": 2297 }, { "epoch": 0.3707647628267183, "grad_norm": 1.8371232120058518, "learning_rate": 7.441243874211511e-07, "loss": 0.5976, "step": 2298 }, { "epoch": 0.3709261051952243, "grad_norm": 1.4745738044136978, "learning_rate": 7.438915332173388e-07, "loss": 0.4433, "step": 2299 }, { "epoch": 0.37108744756373024, "grad_norm": 1.3524262419101194, "learning_rate": 7.436586095813377e-07, "loss": 0.6065, "step": 2300 }, { "epoch": 0.3712487899322362, "grad_norm": 4.5010912949482575, "learning_rate": 7.434256165794577e-07, "loss": 0.5988, "step": 2301 }, { "epoch": 0.37141013230074216, "grad_norm": 1.7307033681156867, "learning_rate": 7.431925542780281e-07, "loss": 0.7809, "step": 2302 }, { "epoch": 0.37157147466924817, "grad_norm": 1.7373644522068104, "learning_rate": 7.429594227433983e-07, "loss": 0.5215, "step": 2303 }, { "epoch": 0.3717328170377541, "grad_norm": 1.4875256480081462, "learning_rate": 7.427262220419373e-07, "loss": 0.508, "step": 2304 }, { "epoch": 0.3718941594062601, "grad_norm": 1.5323960044635492, "learning_rate": 7.424929522400337e-07, "loss": 0.4983, "step": 2305 }, { "epoch": 0.37205550177476604, "grad_norm": 1.8079323929155968, "learning_rate": 7.422596134040961e-07, "loss": 0.5213, "step": 2306 }, { "epoch": 0.372216844143272, "grad_norm": 2.0358652355282914, "learning_rate": 7.420262056005522e-07, "loss": 0.6297, "step": 2307 }, { "epoch": 0.372378186511778, "grad_norm": 1.623017201800803, "learning_rate": 7.417927288958497e-07, "loss": 0.5331, "step": 2308 }, { "epoch": 0.37253952888028397, "grad_norm": 2.2730575191538995, "learning_rate": 7.415591833564558e-07, "loss": 0.6141, "step": 2309 }, { "epoch": 0.3727008712487899, "grad_norm": 1.6174569757744548, "learning_rate": 7.413255690488577e-07, "loss": 0.4764, "step": 2310 }, { "epoch": 0.3728622136172959, "grad_norm": 1.859998005589062, "learning_rate": 7.410918860395614e-07, "loss": 0.6836, "step": 2311 }, { "epoch": 0.3730235559858019, "grad_norm": 1.476334650130527, "learning_rate": 7.408581343950929e-07, "loss": 0.4178, "step": 2312 }, { "epoch": 0.37318489835430785, "grad_norm": 1.6354883344298956, "learning_rate": 7.406243141819981e-07, "loss": 0.4909, "step": 2313 }, { "epoch": 0.3733462407228138, "grad_norm": 1.934862495090658, "learning_rate": 7.403904254668415e-07, "loss": 0.526, "step": 2314 }, { "epoch": 0.37350758309131976, "grad_norm": 1.4782938822428413, "learning_rate": 7.401564683162082e-07, "loss": 0.4406, "step": 2315 }, { "epoch": 0.3736689254598258, "grad_norm": 1.8409801302474236, "learning_rate": 7.399224427967017e-07, "loss": 0.4903, "step": 2316 }, { "epoch": 0.37383026782833173, "grad_norm": 1.4209934897599157, "learning_rate": 7.396883489749458e-07, "loss": 0.4462, "step": 2317 }, { "epoch": 0.3739916101968377, "grad_norm": 1.3810894247074286, "learning_rate": 7.394541869175834e-07, "loss": 0.4784, "step": 2318 }, { "epoch": 0.37415295256534364, "grad_norm": 1.7104711724477857, "learning_rate": 7.39219956691277e-07, "loss": 0.5547, "step": 2319 }, { "epoch": 0.37431429493384966, "grad_norm": 1.5034561078052542, "learning_rate": 7.389856583627078e-07, "loss": 0.5449, "step": 2320 }, { "epoch": 0.3744756373023556, "grad_norm": 1.9956184911338017, "learning_rate": 7.387512919985775e-07, "loss": 0.6791, "step": 2321 }, { "epoch": 0.37463697967086157, "grad_norm": 1.6640710964786758, "learning_rate": 7.385168576656065e-07, "loss": 0.5371, "step": 2322 }, { "epoch": 0.3747983220393675, "grad_norm": 1.3462357262976619, "learning_rate": 7.382823554305344e-07, "loss": 0.6012, "step": 2323 }, { "epoch": 0.3749596644078735, "grad_norm": 1.5653177412893104, "learning_rate": 7.380477853601209e-07, "loss": 0.4314, "step": 2324 }, { "epoch": 0.3751210067763795, "grad_norm": 1.6335021392369222, "learning_rate": 7.37813147521144e-07, "loss": 0.4552, "step": 2325 }, { "epoch": 0.37528234914488545, "grad_norm": 1.8367398071380079, "learning_rate": 7.375784419804018e-07, "loss": 0.539, "step": 2326 }, { "epoch": 0.3754436915133914, "grad_norm": 1.7286027961776311, "learning_rate": 7.373436688047113e-07, "loss": 0.5798, "step": 2327 }, { "epoch": 0.37560503388189737, "grad_norm": 1.2118511567861452, "learning_rate": 7.371088280609089e-07, "loss": 0.5732, "step": 2328 }, { "epoch": 0.3757663762504034, "grad_norm": 1.501788662362773, "learning_rate": 7.368739198158501e-07, "loss": 0.5051, "step": 2329 }, { "epoch": 0.37592771861890933, "grad_norm": 1.3656977356978455, "learning_rate": 7.366389441364096e-07, "loss": 0.4152, "step": 2330 }, { "epoch": 0.3760890609874153, "grad_norm": 1.8758858813203265, "learning_rate": 7.364039010894815e-07, "loss": 0.5249, "step": 2331 }, { "epoch": 0.37625040335592125, "grad_norm": 1.2292074369145363, "learning_rate": 7.36168790741979e-07, "loss": 0.5231, "step": 2332 }, { "epoch": 0.37641174572442726, "grad_norm": 1.749826150160676, "learning_rate": 7.359336131608343e-07, "loss": 0.5712, "step": 2333 }, { "epoch": 0.3765730880929332, "grad_norm": 1.7836453577797078, "learning_rate": 7.356983684129989e-07, "loss": 0.6067, "step": 2334 }, { "epoch": 0.3767344304614392, "grad_norm": 1.3466499441512332, "learning_rate": 7.354630565654433e-07, "loss": 0.4299, "step": 2335 }, { "epoch": 0.37689577282994513, "grad_norm": 1.5597054477301793, "learning_rate": 7.352276776851573e-07, "loss": 0.4961, "step": 2336 }, { "epoch": 0.3770571151984511, "grad_norm": 1.715320216832357, "learning_rate": 7.349922318391498e-07, "loss": 0.6941, "step": 2337 }, { "epoch": 0.3772184575669571, "grad_norm": 2.39458262651348, "learning_rate": 7.347567190944484e-07, "loss": 0.7218, "step": 2338 }, { "epoch": 0.37737979993546306, "grad_norm": 1.9099478578690159, "learning_rate": 7.345211395180999e-07, "loss": 0.495, "step": 2339 }, { "epoch": 0.377541142303969, "grad_norm": 1.394557749605976, "learning_rate": 7.342854931771705e-07, "loss": 0.543, "step": 2340 }, { "epoch": 0.37770248467247497, "grad_norm": 1.5264050655338917, "learning_rate": 7.34049780138745e-07, "loss": 0.5899, "step": 2341 }, { "epoch": 0.377863827040981, "grad_norm": 1.5493721890318233, "learning_rate": 7.338140004699271e-07, "loss": 0.5379, "step": 2342 }, { "epoch": 0.37802516940948694, "grad_norm": 1.6658077490430432, "learning_rate": 7.335781542378399e-07, "loss": 0.5772, "step": 2343 }, { "epoch": 0.3781865117779929, "grad_norm": 1.28820305119073, "learning_rate": 7.33342241509625e-07, "loss": 0.4249, "step": 2344 }, { "epoch": 0.37834785414649885, "grad_norm": 1.9037577487038948, "learning_rate": 7.331062623524435e-07, "loss": 0.5771, "step": 2345 }, { "epoch": 0.37850919651500486, "grad_norm": 1.8402011143612502, "learning_rate": 7.328702168334749e-07, "loss": 0.5614, "step": 2346 }, { "epoch": 0.3786705388835108, "grad_norm": 1.7543277361475678, "learning_rate": 7.326341050199174e-07, "loss": 0.5677, "step": 2347 }, { "epoch": 0.3788318812520168, "grad_norm": 1.4152804867052269, "learning_rate": 7.323979269789888e-07, "loss": 0.4261, "step": 2348 }, { "epoch": 0.37899322362052273, "grad_norm": 1.315790275648776, "learning_rate": 7.321616827779253e-07, "loss": 0.541, "step": 2349 }, { "epoch": 0.3791545659890287, "grad_norm": 1.3267663626056294, "learning_rate": 7.31925372483982e-07, "loss": 0.5022, "step": 2350 }, { "epoch": 0.3793159083575347, "grad_norm": 1.484195899197139, "learning_rate": 7.316889961644329e-07, "loss": 0.5841, "step": 2351 }, { "epoch": 0.37947725072604066, "grad_norm": 1.972279971654779, "learning_rate": 7.314525538865705e-07, "loss": 0.5088, "step": 2352 }, { "epoch": 0.3796385930945466, "grad_norm": 3.396335556867688, "learning_rate": 7.312160457177065e-07, "loss": 0.692, "step": 2353 }, { "epoch": 0.3797999354630526, "grad_norm": 1.7632491068684295, "learning_rate": 7.309794717251712e-07, "loss": 0.4857, "step": 2354 }, { "epoch": 0.3799612778315586, "grad_norm": 1.924818250820268, "learning_rate": 7.307428319763132e-07, "loss": 0.5915, "step": 2355 }, { "epoch": 0.38012262020006454, "grad_norm": 1.8020795333322435, "learning_rate": 7.305061265385009e-07, "loss": 0.5267, "step": 2356 }, { "epoch": 0.3802839625685705, "grad_norm": 1.4444624591991884, "learning_rate": 7.3026935547912e-07, "loss": 0.4759, "step": 2357 }, { "epoch": 0.38044530493707646, "grad_norm": 1.3083802917379288, "learning_rate": 7.30032518865576e-07, "loss": 0.5153, "step": 2358 }, { "epoch": 0.38060664730558247, "grad_norm": 1.9397096032145782, "learning_rate": 7.297956167652927e-07, "loss": 0.6831, "step": 2359 }, { "epoch": 0.3807679896740884, "grad_norm": 1.3947946849939432, "learning_rate": 7.295586492457123e-07, "loss": 0.4122, "step": 2360 }, { "epoch": 0.3809293320425944, "grad_norm": 1.6186399709651553, "learning_rate": 7.293216163742959e-07, "loss": 0.4769, "step": 2361 }, { "epoch": 0.38109067441110034, "grad_norm": 2.021365249735771, "learning_rate": 7.290845182185233e-07, "loss": 0.6266, "step": 2362 }, { "epoch": 0.38125201677960635, "grad_norm": 1.3700980894099508, "learning_rate": 7.288473548458925e-07, "loss": 0.5635, "step": 2363 }, { "epoch": 0.3814133591481123, "grad_norm": 1.763849834533765, "learning_rate": 7.286101263239204e-07, "loss": 0.549, "step": 2364 }, { "epoch": 0.38157470151661826, "grad_norm": 1.2951315225363869, "learning_rate": 7.28372832720142e-07, "loss": 0.4045, "step": 2365 }, { "epoch": 0.3817360438851242, "grad_norm": 1.2688378421084174, "learning_rate": 7.281354741021117e-07, "loss": 0.5693, "step": 2366 }, { "epoch": 0.3818973862536302, "grad_norm": 1.365556190555412, "learning_rate": 7.278980505374016e-07, "loss": 0.6512, "step": 2367 }, { "epoch": 0.3820587286221362, "grad_norm": 1.5896790815175104, "learning_rate": 7.276605620936026e-07, "loss": 0.4159, "step": 2368 }, { "epoch": 0.38222007099064215, "grad_norm": 1.8013777334547094, "learning_rate": 7.27423008838324e-07, "loss": 0.558, "step": 2369 }, { "epoch": 0.3823814133591481, "grad_norm": 2.757272035197166, "learning_rate": 7.271853908391934e-07, "loss": 0.5724, "step": 2370 }, { "epoch": 0.38254275572765406, "grad_norm": 1.4878549040224265, "learning_rate": 7.269477081638573e-07, "loss": 0.5263, "step": 2371 }, { "epoch": 0.3827040980961601, "grad_norm": 2.074280129079494, "learning_rate": 7.267099608799803e-07, "loss": 0.6325, "step": 2372 }, { "epoch": 0.38286544046466603, "grad_norm": 1.2826478822304834, "learning_rate": 7.264721490552452e-07, "loss": 0.5452, "step": 2373 }, { "epoch": 0.383026782833172, "grad_norm": 1.3122575626805806, "learning_rate": 7.262342727573535e-07, "loss": 0.4951, "step": 2374 }, { "epoch": 0.38318812520167794, "grad_norm": 1.4143288714497102, "learning_rate": 7.25996332054025e-07, "loss": 0.4377, "step": 2375 }, { "epoch": 0.38334946757018395, "grad_norm": 1.3057354127937144, "learning_rate": 7.257583270129977e-07, "loss": 0.4314, "step": 2376 }, { "epoch": 0.3835108099386899, "grad_norm": 1.8278059069598873, "learning_rate": 7.255202577020282e-07, "loss": 0.5066, "step": 2377 }, { "epoch": 0.38367215230719587, "grad_norm": 1.7924014588194932, "learning_rate": 7.252821241888909e-07, "loss": 0.552, "step": 2378 }, { "epoch": 0.3838334946757018, "grad_norm": 1.7270195817704215, "learning_rate": 7.250439265413788e-07, "loss": 0.4299, "step": 2379 }, { "epoch": 0.3839948370442078, "grad_norm": 1.6810521459621197, "learning_rate": 7.248056648273034e-07, "loss": 0.7027, "step": 2380 }, { "epoch": 0.3841561794127138, "grad_norm": 1.3655935969113775, "learning_rate": 7.245673391144938e-07, "loss": 0.422, "step": 2381 }, { "epoch": 0.38431752178121975, "grad_norm": 1.2955022693586182, "learning_rate": 7.243289494707979e-07, "loss": 0.4269, "step": 2382 }, { "epoch": 0.3844788641497257, "grad_norm": 1.778872727257083, "learning_rate": 7.240904959640816e-07, "loss": 0.6135, "step": 2383 }, { "epoch": 0.38464020651823166, "grad_norm": 2.209497852497909, "learning_rate": 7.238519786622288e-07, "loss": 0.6876, "step": 2384 }, { "epoch": 0.3848015488867377, "grad_norm": 1.6724312828327894, "learning_rate": 7.23613397633142e-07, "loss": 0.4231, "step": 2385 }, { "epoch": 0.38496289125524363, "grad_norm": 1.7771500366619633, "learning_rate": 7.233747529447414e-07, "loss": 0.4771, "step": 2386 }, { "epoch": 0.3851242336237496, "grad_norm": 2.044505137887919, "learning_rate": 7.231360446649653e-07, "loss": 0.6137, "step": 2387 }, { "epoch": 0.38528557599225555, "grad_norm": 1.7719493117046923, "learning_rate": 7.228972728617707e-07, "loss": 0.5604, "step": 2388 }, { "epoch": 0.38544691836076156, "grad_norm": 2.0360892531311854, "learning_rate": 7.22658437603132e-07, "loss": 0.8334, "step": 2389 }, { "epoch": 0.3856082607292675, "grad_norm": 1.130954613241225, "learning_rate": 7.224195389570421e-07, "loss": 0.4401, "step": 2390 }, { "epoch": 0.3857696030977735, "grad_norm": 1.7732199648899392, "learning_rate": 7.221805769915117e-07, "loss": 0.4261, "step": 2391 }, { "epoch": 0.38593094546627943, "grad_norm": 1.5540652155895582, "learning_rate": 7.219415517745695e-07, "loss": 0.457, "step": 2392 }, { "epoch": 0.38609228783478544, "grad_norm": 1.5100385609180962, "learning_rate": 7.217024633742626e-07, "loss": 0.4036, "step": 2393 }, { "epoch": 0.3862536302032914, "grad_norm": 1.6428226828663184, "learning_rate": 7.214633118586557e-07, "loss": 0.6974, "step": 2394 }, { "epoch": 0.38641497257179735, "grad_norm": 1.6431511458924528, "learning_rate": 7.212240972958315e-07, "loss": 0.6914, "step": 2395 }, { "epoch": 0.3865763149403033, "grad_norm": 2.352894250668287, "learning_rate": 7.209848197538907e-07, "loss": 0.7201, "step": 2396 }, { "epoch": 0.38673765730880927, "grad_norm": 1.5137323576625974, "learning_rate": 7.20745479300952e-07, "loss": 0.5635, "step": 2397 }, { "epoch": 0.3868989996773153, "grad_norm": 1.3602621318834904, "learning_rate": 7.205060760051521e-07, "loss": 0.3087, "step": 2398 }, { "epoch": 0.38706034204582124, "grad_norm": 1.7648121811135975, "learning_rate": 7.202666099346454e-07, "loss": 0.608, "step": 2399 }, { "epoch": 0.3872216844143272, "grad_norm": 1.9684263296335736, "learning_rate": 7.200270811576041e-07, "loss": 0.6578, "step": 2400 }, { "epoch": 0.38738302678283315, "grad_norm": 2.201306505664003, "learning_rate": 7.197874897422182e-07, "loss": 0.5176, "step": 2401 }, { "epoch": 0.38754436915133916, "grad_norm": 1.583649963374859, "learning_rate": 7.195478357566963e-07, "loss": 0.6426, "step": 2402 }, { "epoch": 0.3877057115198451, "grad_norm": 1.7679239345775453, "learning_rate": 7.193081192692638e-07, "loss": 0.5696, "step": 2403 }, { "epoch": 0.3878670538883511, "grad_norm": 1.690756093581806, "learning_rate": 7.190683403481645e-07, "loss": 0.4023, "step": 2404 }, { "epoch": 0.38802839625685703, "grad_norm": 1.850710129407969, "learning_rate": 7.188284990616595e-07, "loss": 0.5256, "step": 2405 }, { "epoch": 0.38818973862536305, "grad_norm": 1.643334685100028, "learning_rate": 7.185885954780281e-07, "loss": 0.4858, "step": 2406 }, { "epoch": 0.388351080993869, "grad_norm": 1.4458519006392574, "learning_rate": 7.183486296655674e-07, "loss": 0.6324, "step": 2407 }, { "epoch": 0.38851242336237496, "grad_norm": 2.0347575494551426, "learning_rate": 7.181086016925919e-07, "loss": 0.5186, "step": 2408 }, { "epoch": 0.3886737657308809, "grad_norm": 1.4935008555493812, "learning_rate": 7.178685116274337e-07, "loss": 0.5308, "step": 2409 }, { "epoch": 0.3888351080993869, "grad_norm": 1.7211219373170552, "learning_rate": 7.176283595384428e-07, "loss": 0.5702, "step": 2410 }, { "epoch": 0.3889964504678929, "grad_norm": 1.832910322747941, "learning_rate": 7.17388145493987e-07, "loss": 0.6147, "step": 2411 }, { "epoch": 0.38915779283639884, "grad_norm": 1.378518933589226, "learning_rate": 7.171478695624515e-07, "loss": 0.4668, "step": 2412 }, { "epoch": 0.3893191352049048, "grad_norm": 1.808568834829397, "learning_rate": 7.16907531812239e-07, "loss": 0.6147, "step": 2413 }, { "epoch": 0.38948047757341075, "grad_norm": 1.8926552204692053, "learning_rate": 7.166671323117702e-07, "loss": 0.6189, "step": 2414 }, { "epoch": 0.38964181994191677, "grad_norm": 1.8433969450604244, "learning_rate": 7.164266711294832e-07, "loss": 0.5834, "step": 2415 }, { "epoch": 0.3898031623104227, "grad_norm": 2.926017558151412, "learning_rate": 7.161861483338335e-07, "loss": 0.8961, "step": 2416 }, { "epoch": 0.3899645046789287, "grad_norm": 2.114267510442539, "learning_rate": 7.159455639932944e-07, "loss": 0.4766, "step": 2417 }, { "epoch": 0.39012584704743464, "grad_norm": 1.700639293180192, "learning_rate": 7.15704918176356e-07, "loss": 0.562, "step": 2418 }, { "epoch": 0.39028718941594065, "grad_norm": 1.6083845776051664, "learning_rate": 7.154642109515272e-07, "loss": 0.5496, "step": 2419 }, { "epoch": 0.3904485317844466, "grad_norm": 2.0165655026663996, "learning_rate": 7.152234423873335e-07, "loss": 0.56, "step": 2420 }, { "epoch": 0.39060987415295256, "grad_norm": 1.1275668002215178, "learning_rate": 7.14982612552318e-07, "loss": 0.3928, "step": 2421 }, { "epoch": 0.3907712165214585, "grad_norm": 1.6325600898981623, "learning_rate": 7.14741721515041e-07, "loss": 0.5022, "step": 2422 }, { "epoch": 0.39093255888996453, "grad_norm": 1.524114292149397, "learning_rate": 7.145007693440808e-07, "loss": 0.3846, "step": 2423 }, { "epoch": 0.3910939012584705, "grad_norm": 1.316429387010949, "learning_rate": 7.142597561080325e-07, "loss": 0.5135, "step": 2424 }, { "epoch": 0.39125524362697645, "grad_norm": 1.7074043337606655, "learning_rate": 7.140186818755093e-07, "loss": 0.5992, "step": 2425 }, { "epoch": 0.3914165859954824, "grad_norm": 1.9416547343770176, "learning_rate": 7.13777546715141e-07, "loss": 0.6114, "step": 2426 }, { "epoch": 0.39157792836398836, "grad_norm": 1.2364645064643607, "learning_rate": 7.135363506955753e-07, "loss": 0.3504, "step": 2427 }, { "epoch": 0.39173927073249437, "grad_norm": 1.2331816300750333, "learning_rate": 7.132950938854769e-07, "loss": 0.3838, "step": 2428 }, { "epoch": 0.39190061310100033, "grad_norm": 1.50436909476731, "learning_rate": 7.13053776353528e-07, "loss": 0.5126, "step": 2429 }, { "epoch": 0.3920619554695063, "grad_norm": 1.7225518227409584, "learning_rate": 7.128123981684279e-07, "loss": 0.4669, "step": 2430 }, { "epoch": 0.39222329783801224, "grad_norm": 1.5028053118208016, "learning_rate": 7.125709593988933e-07, "loss": 0.4114, "step": 2431 }, { "epoch": 0.39238464020651825, "grad_norm": 1.4641652888445522, "learning_rate": 7.123294601136582e-07, "loss": 0.4247, "step": 2432 }, { "epoch": 0.3925459825750242, "grad_norm": 1.6548672788249554, "learning_rate": 7.120879003814739e-07, "loss": 0.5012, "step": 2433 }, { "epoch": 0.39270732494353017, "grad_norm": 2.069792023498576, "learning_rate": 7.118462802711084e-07, "loss": 0.5326, "step": 2434 }, { "epoch": 0.3928686673120361, "grad_norm": 1.3869427137446635, "learning_rate": 7.116045998513474e-07, "loss": 0.3761, "step": 2435 }, { "epoch": 0.39303000968054214, "grad_norm": 1.6125012005941963, "learning_rate": 7.113628591909937e-07, "loss": 0.5738, "step": 2436 }, { "epoch": 0.3931913520490481, "grad_norm": 1.9670580299439857, "learning_rate": 7.11121058358867e-07, "loss": 0.6621, "step": 2437 }, { "epoch": 0.39335269441755405, "grad_norm": 1.8032345558350262, "learning_rate": 7.108791974238046e-07, "loss": 0.4857, "step": 2438 }, { "epoch": 0.39351403678606, "grad_norm": 1.7857944007340585, "learning_rate": 7.106372764546604e-07, "loss": 0.7725, "step": 2439 }, { "epoch": 0.39367537915456596, "grad_norm": 1.6157931891430837, "learning_rate": 7.103952955203055e-07, "loss": 0.7244, "step": 2440 }, { "epoch": 0.393836721523072, "grad_norm": 1.8424902022578735, "learning_rate": 7.101532546896283e-07, "loss": 0.3613, "step": 2441 }, { "epoch": 0.39399806389157793, "grad_norm": 1.9208667250218117, "learning_rate": 7.099111540315343e-07, "loss": 0.5948, "step": 2442 }, { "epoch": 0.3941594062600839, "grad_norm": 1.9693031139564756, "learning_rate": 7.096689936149455e-07, "loss": 0.6163, "step": 2443 }, { "epoch": 0.39432074862858985, "grad_norm": 1.7114204874650651, "learning_rate": 7.094267735088015e-07, "loss": 0.5156, "step": 2444 }, { "epoch": 0.39448209099709586, "grad_norm": 2.3244225677111534, "learning_rate": 7.091844937820585e-07, "loss": 0.8896, "step": 2445 }, { "epoch": 0.3946434333656018, "grad_norm": 1.303339598844709, "learning_rate": 7.0894215450369e-07, "loss": 0.5123, "step": 2446 }, { "epoch": 0.39480477573410777, "grad_norm": 5.801570995272193, "learning_rate": 7.086997557426862e-07, "loss": 0.6095, "step": 2447 }, { "epoch": 0.39496611810261373, "grad_norm": 1.790970630142959, "learning_rate": 7.084572975680544e-07, "loss": 0.5729, "step": 2448 }, { "epoch": 0.39512746047111974, "grad_norm": 1.6349410643148452, "learning_rate": 7.082147800488186e-07, "loss": 0.5242, "step": 2449 }, { "epoch": 0.3952888028396257, "grad_norm": 1.9538057895536745, "learning_rate": 7.079722032540199e-07, "loss": 0.7397, "step": 2450 }, { "epoch": 0.39545014520813165, "grad_norm": 1.76885070514095, "learning_rate": 7.077295672527161e-07, "loss": 0.6281, "step": 2451 }, { "epoch": 0.3956114875766376, "grad_norm": 1.362904143983896, "learning_rate": 7.074868721139822e-07, "loss": 0.4987, "step": 2452 }, { "epoch": 0.39577282994514357, "grad_norm": 1.5905451246790003, "learning_rate": 7.072441179069096e-07, "loss": 0.4415, "step": 2453 }, { "epoch": 0.3959341723136496, "grad_norm": 1.5768060743680372, "learning_rate": 7.070013047006067e-07, "loss": 0.5425, "step": 2454 }, { "epoch": 0.39609551468215554, "grad_norm": 1.2717585348656888, "learning_rate": 7.067584325641988e-07, "loss": 0.4679, "step": 2455 }, { "epoch": 0.3962568570506615, "grad_norm": 1.7275854334152436, "learning_rate": 7.065155015668279e-07, "loss": 0.5063, "step": 2456 }, { "epoch": 0.39641819941916745, "grad_norm": 1.2254412926658829, "learning_rate": 7.062725117776527e-07, "loss": 0.3701, "step": 2457 }, { "epoch": 0.39657954178767346, "grad_norm": 2.021432125872164, "learning_rate": 7.060294632658485e-07, "loss": 0.5676, "step": 2458 }, { "epoch": 0.3967408841561794, "grad_norm": 1.676672638388908, "learning_rate": 7.057863561006078e-07, "loss": 0.3866, "step": 2459 }, { "epoch": 0.3969022265246854, "grad_norm": 1.6842942340574798, "learning_rate": 7.055431903511393e-07, "loss": 0.6155, "step": 2460 }, { "epoch": 0.39706356889319133, "grad_norm": 1.6283421992070666, "learning_rate": 7.052999660866687e-07, "loss": 0.4463, "step": 2461 }, { "epoch": 0.39722491126169734, "grad_norm": 1.5987821682817207, "learning_rate": 7.05056683376438e-07, "loss": 0.3308, "step": 2462 }, { "epoch": 0.3973862536302033, "grad_norm": 1.3900933696069775, "learning_rate": 7.048133422897063e-07, "loss": 0.406, "step": 2463 }, { "epoch": 0.39754759599870926, "grad_norm": 1.673596645449236, "learning_rate": 7.045699428957488e-07, "loss": 0.6347, "step": 2464 }, { "epoch": 0.3977089383672152, "grad_norm": 1.5158156442195543, "learning_rate": 7.04326485263858e-07, "loss": 0.5625, "step": 2465 }, { "epoch": 0.3978702807357212, "grad_norm": 1.7736988295203324, "learning_rate": 7.040829694633422e-07, "loss": 0.5949, "step": 2466 }, { "epoch": 0.3980316231042272, "grad_norm": 1.6395673703390456, "learning_rate": 7.038393955635268e-07, "loss": 0.5455, "step": 2467 }, { "epoch": 0.39819296547273314, "grad_norm": 1.839293897877766, "learning_rate": 7.035957636337537e-07, "loss": 0.6032, "step": 2468 }, { "epoch": 0.3983543078412391, "grad_norm": 1.6268055495138178, "learning_rate": 7.033520737433808e-07, "loss": 0.7556, "step": 2469 }, { "epoch": 0.39851565020974505, "grad_norm": 1.6438440622315238, "learning_rate": 7.031083259617832e-07, "loss": 0.5603, "step": 2470 }, { "epoch": 0.39867699257825107, "grad_norm": 1.6778439362782271, "learning_rate": 7.02864520358352e-07, "loss": 0.4955, "step": 2471 }, { "epoch": 0.398838334946757, "grad_norm": 2.147432071202078, "learning_rate": 7.026206570024949e-07, "loss": 0.5639, "step": 2472 }, { "epoch": 0.398999677315263, "grad_norm": 1.7333478893377987, "learning_rate": 7.023767359636364e-07, "loss": 0.5494, "step": 2473 }, { "epoch": 0.39916101968376894, "grad_norm": 1.8096617476393595, "learning_rate": 7.021327573112166e-07, "loss": 0.6341, "step": 2474 }, { "epoch": 0.39932236205227495, "grad_norm": 1.493145380595605, "learning_rate": 7.018887211146928e-07, "loss": 0.4749, "step": 2475 }, { "epoch": 0.3994837044207809, "grad_norm": 1.5587929811035275, "learning_rate": 7.016446274435384e-07, "loss": 0.5592, "step": 2476 }, { "epoch": 0.39964504678928686, "grad_norm": 1.6954584014973313, "learning_rate": 7.014004763672429e-07, "loss": 0.6298, "step": 2477 }, { "epoch": 0.3998063891577928, "grad_norm": 1.8449415462300527, "learning_rate": 7.011562679553126e-07, "loss": 0.4532, "step": 2478 }, { "epoch": 0.39996773152629883, "grad_norm": 1.6661391218057602, "learning_rate": 7.009120022772698e-07, "loss": 0.5169, "step": 2479 }, { "epoch": 0.4001290738948048, "grad_norm": 1.421004395614365, "learning_rate": 7.006676794026532e-07, "loss": 0.3857, "step": 2480 }, { "epoch": 0.40029041626331074, "grad_norm": 1.6862008780867053, "learning_rate": 7.004232994010177e-07, "loss": 0.5862, "step": 2481 }, { "epoch": 0.4004517586318167, "grad_norm": 2.1406931622644616, "learning_rate": 7.001788623419349e-07, "loss": 0.6792, "step": 2482 }, { "epoch": 0.40061310100032266, "grad_norm": 1.6362265345185467, "learning_rate": 6.999343682949918e-07, "loss": 0.6143, "step": 2483 }, { "epoch": 0.40077444336882867, "grad_norm": 1.8269761216806542, "learning_rate": 6.996898173297926e-07, "loss": 0.6186, "step": 2484 }, { "epoch": 0.4009357857373346, "grad_norm": 2.3286299189978856, "learning_rate": 6.994452095159568e-07, "loss": 0.7113, "step": 2485 }, { "epoch": 0.4010971281058406, "grad_norm": 2.067068877100079, "learning_rate": 6.992005449231207e-07, "loss": 0.4425, "step": 2486 }, { "epoch": 0.40125847047434654, "grad_norm": 1.6376278339603432, "learning_rate": 6.989558236209365e-07, "loss": 0.6256, "step": 2487 }, { "epoch": 0.40141981284285255, "grad_norm": 2.212375025263775, "learning_rate": 6.987110456790728e-07, "loss": 0.4986, "step": 2488 }, { "epoch": 0.4015811552113585, "grad_norm": 1.8046716466430301, "learning_rate": 6.984662111672139e-07, "loss": 0.4707, "step": 2489 }, { "epoch": 0.40174249757986447, "grad_norm": 1.3668919271041091, "learning_rate": 6.982213201550605e-07, "loss": 0.4412, "step": 2490 }, { "epoch": 0.4019038399483704, "grad_norm": 1.4746478549408137, "learning_rate": 6.979763727123296e-07, "loss": 0.6861, "step": 2491 }, { "epoch": 0.40206518231687643, "grad_norm": 1.451614661454944, "learning_rate": 6.977313689087535e-07, "loss": 0.451, "step": 2492 }, { "epoch": 0.4022265246853824, "grad_norm": 1.840042913392571, "learning_rate": 6.974863088140812e-07, "loss": 0.6043, "step": 2493 }, { "epoch": 0.40238786705388835, "grad_norm": 2.1680754214325706, "learning_rate": 6.972411924980778e-07, "loss": 0.6782, "step": 2494 }, { "epoch": 0.4025492094223943, "grad_norm": 1.4272394118150975, "learning_rate": 6.96996020030524e-07, "loss": 0.485, "step": 2495 }, { "epoch": 0.4027105517909003, "grad_norm": 1.563466116723186, "learning_rate": 6.967507914812167e-07, "loss": 0.4298, "step": 2496 }, { "epoch": 0.4028718941594063, "grad_norm": 1.576350054306904, "learning_rate": 6.965055069199687e-07, "loss": 0.434, "step": 2497 }, { "epoch": 0.40303323652791223, "grad_norm": 1.5095380485720358, "learning_rate": 6.962601664166085e-07, "loss": 0.4358, "step": 2498 }, { "epoch": 0.4031945788964182, "grad_norm": 1.4070251871388237, "learning_rate": 6.960147700409813e-07, "loss": 0.4564, "step": 2499 }, { "epoch": 0.40335592126492414, "grad_norm": 1.6064759596478224, "learning_rate": 6.957693178629474e-07, "loss": 0.6141, "step": 2500 }, { "epoch": 0.40351726363343016, "grad_norm": 1.5993358485329427, "learning_rate": 6.955238099523831e-07, "loss": 0.4852, "step": 2501 }, { "epoch": 0.4036786060019361, "grad_norm": 2.1786351004772446, "learning_rate": 6.952782463791812e-07, "loss": 0.5284, "step": 2502 }, { "epoch": 0.40383994837044207, "grad_norm": 1.5364123148950233, "learning_rate": 6.950326272132494e-07, "loss": 0.4702, "step": 2503 }, { "epoch": 0.404001290738948, "grad_norm": 1.7720522406737218, "learning_rate": 6.94786952524512e-07, "loss": 0.5702, "step": 2504 }, { "epoch": 0.40416263310745404, "grad_norm": 1.7246496038183894, "learning_rate": 6.94541222382909e-07, "loss": 0.4589, "step": 2505 }, { "epoch": 0.40432397547596, "grad_norm": 1.3751937122874736, "learning_rate": 6.942954368583957e-07, "loss": 0.4025, "step": 2506 }, { "epoch": 0.40448531784446595, "grad_norm": 1.4877139634832175, "learning_rate": 6.940495960209435e-07, "loss": 0.4486, "step": 2507 }, { "epoch": 0.4046466602129719, "grad_norm": 1.681218429892987, "learning_rate": 6.938036999405397e-07, "loss": 0.3835, "step": 2508 }, { "epoch": 0.4048080025814779, "grad_norm": 1.2977114817826656, "learning_rate": 6.935577486871871e-07, "loss": 0.5242, "step": 2509 }, { "epoch": 0.4049693449499839, "grad_norm": 1.4485607041321769, "learning_rate": 6.93311742330904e-07, "loss": 0.6289, "step": 2510 }, { "epoch": 0.40513068731848983, "grad_norm": 1.8455711680879883, "learning_rate": 6.93065680941725e-07, "loss": 0.5366, "step": 2511 }, { "epoch": 0.4052920296869958, "grad_norm": 1.2391010061969725, "learning_rate": 6.928195645896999e-07, "loss": 0.3469, "step": 2512 }, { "epoch": 0.40545337205550175, "grad_norm": 1.3445577189950484, "learning_rate": 6.925733933448943e-07, "loss": 0.5543, "step": 2513 }, { "epoch": 0.40561471442400776, "grad_norm": 1.961832943915499, "learning_rate": 6.923271672773893e-07, "loss": 0.5704, "step": 2514 }, { "epoch": 0.4057760567925137, "grad_norm": 1.5517163280129749, "learning_rate": 6.920808864572817e-07, "loss": 0.4457, "step": 2515 }, { "epoch": 0.4059373991610197, "grad_norm": 1.4694006270133535, "learning_rate": 6.918345509546838e-07, "loss": 0.5137, "step": 2516 }, { "epoch": 0.40609874152952563, "grad_norm": 1.9574499234435876, "learning_rate": 6.915881608397236e-07, "loss": 0.5875, "step": 2517 }, { "epoch": 0.40626008389803164, "grad_norm": 1.4535593183634932, "learning_rate": 6.913417161825449e-07, "loss": 0.4771, "step": 2518 }, { "epoch": 0.4064214262665376, "grad_norm": 1.579247481493431, "learning_rate": 6.910952170533064e-07, "loss": 0.5746, "step": 2519 }, { "epoch": 0.40658276863504356, "grad_norm": 2.1633124308269394, "learning_rate": 6.908486635221826e-07, "loss": 0.7943, "step": 2520 }, { "epoch": 0.4067441110035495, "grad_norm": 1.287383587213077, "learning_rate": 6.906020556593636e-07, "loss": 0.4926, "step": 2521 }, { "epoch": 0.4069054533720555, "grad_norm": 2.2804226354763326, "learning_rate": 6.90355393535055e-07, "loss": 0.655, "step": 2522 }, { "epoch": 0.4070667957405615, "grad_norm": 1.5750472016300563, "learning_rate": 6.901086772194778e-07, "loss": 0.5987, "step": 2523 }, { "epoch": 0.40722813810906744, "grad_norm": 1.8713055134440224, "learning_rate": 6.89861906782868e-07, "loss": 0.6112, "step": 2524 }, { "epoch": 0.4073894804775734, "grad_norm": 1.7930039878158348, "learning_rate": 6.896150822954776e-07, "loss": 0.4725, "step": 2525 }, { "epoch": 0.4075508228460794, "grad_norm": 1.4760197848051049, "learning_rate": 6.893682038275737e-07, "loss": 0.5911, "step": 2526 }, { "epoch": 0.40771216521458536, "grad_norm": 1.8433917069231918, "learning_rate": 6.89121271449439e-07, "loss": 0.4853, "step": 2527 }, { "epoch": 0.4078735075830913, "grad_norm": 1.5927658782536362, "learning_rate": 6.888742852313712e-07, "loss": 0.3986, "step": 2528 }, { "epoch": 0.4080348499515973, "grad_norm": 1.366543340158301, "learning_rate": 6.886272452436834e-07, "loss": 0.5702, "step": 2529 }, { "epoch": 0.40819619232010323, "grad_norm": 1.8302418768871445, "learning_rate": 6.883801515567043e-07, "loss": 0.6031, "step": 2530 }, { "epoch": 0.40835753468860925, "grad_norm": 1.741341492384528, "learning_rate": 6.881330042407776e-07, "loss": 0.7122, "step": 2531 }, { "epoch": 0.4085188770571152, "grad_norm": 1.6157098921233883, "learning_rate": 6.878858033662624e-07, "loss": 0.5309, "step": 2532 }, { "epoch": 0.40868021942562116, "grad_norm": 1.2876291802792748, "learning_rate": 6.87638549003533e-07, "loss": 0.4566, "step": 2533 }, { "epoch": 0.4088415617941271, "grad_norm": 1.698176974447834, "learning_rate": 6.873912412229787e-07, "loss": 0.4294, "step": 2534 }, { "epoch": 0.40900290416263313, "grad_norm": 1.613232997514435, "learning_rate": 6.871438800950049e-07, "loss": 0.5933, "step": 2535 }, { "epoch": 0.4091642465311391, "grad_norm": 1.6453697259846394, "learning_rate": 6.868964656900309e-07, "loss": 0.5354, "step": 2536 }, { "epoch": 0.40932558889964504, "grad_norm": 1.2746304967675122, "learning_rate": 6.86648998078492e-07, "loss": 0.3921, "step": 2537 }, { "epoch": 0.409486931268151, "grad_norm": 1.6184368375580547, "learning_rate": 6.864014773308383e-07, "loss": 0.5762, "step": 2538 }, { "epoch": 0.409648273636657, "grad_norm": 1.74623513556061, "learning_rate": 6.861539035175353e-07, "loss": 0.5479, "step": 2539 }, { "epoch": 0.40980961600516297, "grad_norm": 1.7194957849085404, "learning_rate": 6.859062767090637e-07, "loss": 0.5033, "step": 2540 }, { "epoch": 0.4099709583736689, "grad_norm": 1.5636904187214284, "learning_rate": 6.856585969759188e-07, "loss": 0.4031, "step": 2541 }, { "epoch": 0.4101323007421749, "grad_norm": 1.6587445169457407, "learning_rate": 6.854108643886113e-07, "loss": 0.5801, "step": 2542 }, { "epoch": 0.41029364311068084, "grad_norm": 2.5415811576425784, "learning_rate": 6.851630790176667e-07, "loss": 0.3127, "step": 2543 }, { "epoch": 0.41045498547918685, "grad_norm": 1.5273553794164436, "learning_rate": 6.849152409336259e-07, "loss": 0.3387, "step": 2544 }, { "epoch": 0.4106163278476928, "grad_norm": 2.0786789464133135, "learning_rate": 6.846673502070446e-07, "loss": 0.5425, "step": 2545 }, { "epoch": 0.41077767021619876, "grad_norm": 1.8660776838300766, "learning_rate": 6.844194069084935e-07, "loss": 0.6187, "step": 2546 }, { "epoch": 0.4109390125847047, "grad_norm": 2.1006880859054577, "learning_rate": 6.841714111085583e-07, "loss": 0.5996, "step": 2547 }, { "epoch": 0.41110035495321073, "grad_norm": 1.5137659843476607, "learning_rate": 6.839233628778395e-07, "loss": 0.4857, "step": 2548 }, { "epoch": 0.4112616973217167, "grad_norm": 1.707307980800635, "learning_rate": 6.836752622869527e-07, "loss": 0.5391, "step": 2549 }, { "epoch": 0.41142303969022265, "grad_norm": 1.367228610919292, "learning_rate": 6.834271094065282e-07, "loss": 0.5226, "step": 2550 }, { "epoch": 0.4115843820587286, "grad_norm": 2.089506261081813, "learning_rate": 6.831789043072115e-07, "loss": 0.6408, "step": 2551 }, { "epoch": 0.4117457244272346, "grad_norm": 1.7073900906221067, "learning_rate": 6.829306470596628e-07, "loss": 0.4484, "step": 2552 }, { "epoch": 0.4119070667957406, "grad_norm": 1.3252889534033012, "learning_rate": 6.826823377345572e-07, "loss": 0.3654, "step": 2553 }, { "epoch": 0.41206840916424653, "grad_norm": 1.2394824534594173, "learning_rate": 6.824339764025844e-07, "loss": 0.4761, "step": 2554 }, { "epoch": 0.4122297515327525, "grad_norm": 1.9299442259482515, "learning_rate": 6.821855631344492e-07, "loss": 0.658, "step": 2555 }, { "epoch": 0.41239109390125844, "grad_norm": 1.586361156684267, "learning_rate": 6.819370980008707e-07, "loss": 0.6252, "step": 2556 }, { "epoch": 0.41255243626976446, "grad_norm": 2.0680074275268927, "learning_rate": 6.816885810725837e-07, "loss": 0.5605, "step": 2557 }, { "epoch": 0.4127137786382704, "grad_norm": 1.2949651880629776, "learning_rate": 6.814400124203367e-07, "loss": 0.3507, "step": 2558 }, { "epoch": 0.41287512100677637, "grad_norm": 1.8172923562128986, "learning_rate": 6.811913921148937e-07, "loss": 0.5839, "step": 2559 }, { "epoch": 0.4130364633752823, "grad_norm": 1.7536117884253615, "learning_rate": 6.80942720227033e-07, "loss": 0.3499, "step": 2560 }, { "epoch": 0.41319780574378834, "grad_norm": 1.9351673235247235, "learning_rate": 6.806939968275476e-07, "loss": 0.4639, "step": 2561 }, { "epoch": 0.4133591481122943, "grad_norm": 1.9644670675211127, "learning_rate": 6.804452219872452e-07, "loss": 0.5468, "step": 2562 }, { "epoch": 0.41352049048080025, "grad_norm": 1.703698787801134, "learning_rate": 6.801963957769483e-07, "loss": 0.5875, "step": 2563 }, { "epoch": 0.4136818328493062, "grad_norm": 1.6887245680039715, "learning_rate": 6.799475182674941e-07, "loss": 0.6852, "step": 2564 }, { "epoch": 0.4138431752178122, "grad_norm": 2.1872889825676256, "learning_rate": 6.796985895297338e-07, "loss": 0.9447, "step": 2565 }, { "epoch": 0.4140045175863182, "grad_norm": 2.011819722132267, "learning_rate": 6.794496096345341e-07, "loss": 0.5776, "step": 2566 }, { "epoch": 0.41416585995482413, "grad_norm": 1.0950255449354958, "learning_rate": 6.792005786527752e-07, "loss": 0.3608, "step": 2567 }, { "epoch": 0.4143272023233301, "grad_norm": 1.9716167468246086, "learning_rate": 6.789514966553529e-07, "loss": 0.4757, "step": 2568 }, { "epoch": 0.4144885446918361, "grad_norm": 1.3753077856120426, "learning_rate": 6.787023637131769e-07, "loss": 0.4462, "step": 2569 }, { "epoch": 0.41464988706034206, "grad_norm": 1.6755028695742475, "learning_rate": 6.784531798971713e-07, "loss": 0.434, "step": 2570 }, { "epoch": 0.414811229428848, "grad_norm": 1.9988321232341046, "learning_rate": 6.782039452782752e-07, "loss": 0.5673, "step": 2571 }, { "epoch": 0.414972571797354, "grad_norm": 1.7979234373363726, "learning_rate": 6.77954659927442e-07, "loss": 0.6681, "step": 2572 }, { "epoch": 0.41513391416585993, "grad_norm": 1.5481868537186005, "learning_rate": 6.777053239156388e-07, "loss": 0.5347, "step": 2573 }, { "epoch": 0.41529525653436594, "grad_norm": 2.021446751046308, "learning_rate": 6.774559373138483e-07, "loss": 0.6511, "step": 2574 }, { "epoch": 0.4154565989028719, "grad_norm": 1.2888200936555982, "learning_rate": 6.77206500193067e-07, "loss": 0.433, "step": 2575 }, { "epoch": 0.41561794127137786, "grad_norm": 1.6802001880401807, "learning_rate": 6.769570126243058e-07, "loss": 0.803, "step": 2576 }, { "epoch": 0.4157792836398838, "grad_norm": 1.6266162610582546, "learning_rate": 6.767074746785899e-07, "loss": 0.7554, "step": 2577 }, { "epoch": 0.4159406260083898, "grad_norm": 1.3914528429304103, "learning_rate": 6.764578864269589e-07, "loss": 0.6861, "step": 2578 }, { "epoch": 0.4161019683768958, "grad_norm": 1.3134443428030118, "learning_rate": 6.762082479404669e-07, "loss": 0.4349, "step": 2579 }, { "epoch": 0.41626331074540174, "grad_norm": 1.2821436301007807, "learning_rate": 6.759585592901819e-07, "loss": 0.5467, "step": 2580 }, { "epoch": 0.4164246531139077, "grad_norm": 2.2321443961001948, "learning_rate": 6.757088205471871e-07, "loss": 0.495, "step": 2581 }, { "epoch": 0.4165859954824137, "grad_norm": 1.8578634449203508, "learning_rate": 6.754590317825785e-07, "loss": 0.4891, "step": 2582 }, { "epoch": 0.41674733785091966, "grad_norm": 1.4691005045071421, "learning_rate": 6.752091930674673e-07, "loss": 0.5209, "step": 2583 }, { "epoch": 0.4169086802194256, "grad_norm": 1.296669357725226, "learning_rate": 6.749593044729794e-07, "loss": 0.4775, "step": 2584 }, { "epoch": 0.4170700225879316, "grad_norm": 1.6533146884594982, "learning_rate": 6.747093660702534e-07, "loss": 0.4409, "step": 2585 }, { "epoch": 0.41723136495643753, "grad_norm": 1.8879287662201165, "learning_rate": 6.744593779304435e-07, "loss": 0.5605, "step": 2586 }, { "epoch": 0.41739270732494355, "grad_norm": 1.8377258680967552, "learning_rate": 6.742093401247172e-07, "loss": 0.7019, "step": 2587 }, { "epoch": 0.4175540496934495, "grad_norm": 2.4972487569758703, "learning_rate": 6.739592527242568e-07, "loss": 0.6977, "step": 2588 }, { "epoch": 0.41771539206195546, "grad_norm": 1.6190441733278211, "learning_rate": 6.73709115800258e-07, "loss": 0.5256, "step": 2589 }, { "epoch": 0.4178767344304614, "grad_norm": 1.4537070144504032, "learning_rate": 6.73458929423931e-07, "loss": 0.5002, "step": 2590 }, { "epoch": 0.41803807679896743, "grad_norm": 2.5469409957479447, "learning_rate": 6.732086936665002e-07, "loss": 0.661, "step": 2591 }, { "epoch": 0.4181994191674734, "grad_norm": 2.014635893153505, "learning_rate": 6.729584085992039e-07, "loss": 0.6375, "step": 2592 }, { "epoch": 0.41836076153597934, "grad_norm": 1.7473435675985847, "learning_rate": 6.727080742932943e-07, "loss": 0.5952, "step": 2593 }, { "epoch": 0.4185221039044853, "grad_norm": 1.5852749948785565, "learning_rate": 6.724576908200378e-07, "loss": 0.6048, "step": 2594 }, { "epoch": 0.4186834462729913, "grad_norm": 1.79332448717273, "learning_rate": 6.722072582507146e-07, "loss": 0.4144, "step": 2595 }, { "epoch": 0.41884478864149727, "grad_norm": 1.7152863715501798, "learning_rate": 6.719567766566193e-07, "loss": 0.6162, "step": 2596 }, { "epoch": 0.4190061310100032, "grad_norm": 1.4054567854977948, "learning_rate": 6.717062461090599e-07, "loss": 0.4568, "step": 2597 }, { "epoch": 0.4191674733785092, "grad_norm": 1.5466528068255818, "learning_rate": 6.714556666793589e-07, "loss": 0.4384, "step": 2598 }, { "epoch": 0.4193288157470152, "grad_norm": 2.100937647617374, "learning_rate": 6.712050384388522e-07, "loss": 0.6774, "step": 2599 }, { "epoch": 0.41949015811552115, "grad_norm": 1.51876968873374, "learning_rate": 6.7095436145889e-07, "loss": 0.4139, "step": 2600 }, { "epoch": 0.4196515004840271, "grad_norm": 1.6103643690979927, "learning_rate": 6.70703635810836e-07, "loss": 0.3606, "step": 2601 }, { "epoch": 0.41981284285253306, "grad_norm": 1.7848374898113304, "learning_rate": 6.704528615660681e-07, "loss": 0.4898, "step": 2602 }, { "epoch": 0.419974185221039, "grad_norm": 1.892826658450702, "learning_rate": 6.702020387959778e-07, "loss": 0.6725, "step": 2603 }, { "epoch": 0.42013552758954503, "grad_norm": 1.8490945971786261, "learning_rate": 6.699511675719707e-07, "loss": 0.5866, "step": 2604 }, { "epoch": 0.420296869958051, "grad_norm": 1.3139642994006677, "learning_rate": 6.697002479654657e-07, "loss": 0.532, "step": 2605 }, { "epoch": 0.42045821232655695, "grad_norm": 2.1729415360585334, "learning_rate": 6.69449280047896e-07, "loss": 0.5821, "step": 2606 }, { "epoch": 0.4206195546950629, "grad_norm": 2.0423811875411917, "learning_rate": 6.691982638907083e-07, "loss": 0.4923, "step": 2607 }, { "epoch": 0.4207808970635689, "grad_norm": 1.9670193649564427, "learning_rate": 6.689471995653629e-07, "loss": 0.5416, "step": 2608 }, { "epoch": 0.42094223943207487, "grad_norm": 1.8910424268210462, "learning_rate": 6.686960871433342e-07, "loss": 0.514, "step": 2609 }, { "epoch": 0.42110358180058083, "grad_norm": 2.092457842293924, "learning_rate": 6.684449266961099e-07, "loss": 0.5902, "step": 2610 }, { "epoch": 0.4212649241690868, "grad_norm": 2.047671679890179, "learning_rate": 6.681937182951918e-07, "loss": 0.5434, "step": 2611 }, { "epoch": 0.4214262665375928, "grad_norm": 1.5800789548968386, "learning_rate": 6.679424620120949e-07, "loss": 0.419, "step": 2612 }, { "epoch": 0.42158760890609875, "grad_norm": 1.6097401325120682, "learning_rate": 6.676911579183477e-07, "loss": 0.4892, "step": 2613 }, { "epoch": 0.4217489512746047, "grad_norm": 1.8138496372609823, "learning_rate": 6.67439806085493e-07, "loss": 0.6395, "step": 2614 }, { "epoch": 0.42191029364311067, "grad_norm": 1.9542226529862092, "learning_rate": 6.671884065850869e-07, "loss": 0.6042, "step": 2615 }, { "epoch": 0.4220716360116166, "grad_norm": 1.7466130177919459, "learning_rate": 6.669369594886989e-07, "loss": 0.4668, "step": 2616 }, { "epoch": 0.42223297838012264, "grad_norm": 1.858650066200805, "learning_rate": 6.66685464867912e-07, "loss": 0.6835, "step": 2617 }, { "epoch": 0.4223943207486286, "grad_norm": 1.5989305408891321, "learning_rate": 6.664339227943229e-07, "loss": 0.5903, "step": 2618 }, { "epoch": 0.42255566311713455, "grad_norm": 1.6810266878232334, "learning_rate": 6.661823333395419e-07, "loss": 0.5034, "step": 2619 }, { "epoch": 0.4227170054856405, "grad_norm": 1.7704857952686797, "learning_rate": 6.659306965751927e-07, "loss": 0.5185, "step": 2620 }, { "epoch": 0.4228783478541465, "grad_norm": 1.679827733063291, "learning_rate": 6.656790125729124e-07, "loss": 0.5478, "step": 2621 }, { "epoch": 0.4230396902226525, "grad_norm": 1.991220755784401, "learning_rate": 6.654272814043514e-07, "loss": 0.5866, "step": 2622 }, { "epoch": 0.42320103259115843, "grad_norm": 1.5279127071021734, "learning_rate": 6.651755031411739e-07, "loss": 0.5161, "step": 2623 }, { "epoch": 0.4233623749596644, "grad_norm": 1.9803685872107084, "learning_rate": 6.649236778550572e-07, "loss": 0.511, "step": 2624 }, { "epoch": 0.4235237173281704, "grad_norm": 1.912851316310271, "learning_rate": 6.646718056176922e-07, "loss": 0.6672, "step": 2625 }, { "epoch": 0.42368505969667636, "grad_norm": 1.7557277948386794, "learning_rate": 6.644198865007832e-07, "loss": 0.5841, "step": 2626 }, { "epoch": 0.4238464020651823, "grad_norm": 1.8194519104290758, "learning_rate": 6.641679205760473e-07, "loss": 0.4853, "step": 2627 }, { "epoch": 0.42400774443368827, "grad_norm": 1.8845798539236323, "learning_rate": 6.639159079152159e-07, "loss": 0.7103, "step": 2628 }, { "epoch": 0.4241690868021943, "grad_norm": 2.0185955543607443, "learning_rate": 6.636638485900328e-07, "loss": 0.8153, "step": 2629 }, { "epoch": 0.42433042917070024, "grad_norm": 2.1367322513356006, "learning_rate": 6.634117426722555e-07, "loss": 0.7036, "step": 2630 }, { "epoch": 0.4244917715392062, "grad_norm": 1.5956974473009455, "learning_rate": 6.631595902336547e-07, "loss": 0.5627, "step": 2631 }, { "epoch": 0.42465311390771215, "grad_norm": 1.4681328633545636, "learning_rate": 6.629073913460142e-07, "loss": 0.5773, "step": 2632 }, { "epoch": 0.4248144562762181, "grad_norm": 1.2967408811779209, "learning_rate": 6.626551460811315e-07, "loss": 0.594, "step": 2633 }, { "epoch": 0.4249757986447241, "grad_norm": 1.4811396626128122, "learning_rate": 6.624028545108166e-07, "loss": 0.5371, "step": 2634 }, { "epoch": 0.4251371410132301, "grad_norm": 1.940215084993039, "learning_rate": 6.621505167068935e-07, "loss": 0.5382, "step": 2635 }, { "epoch": 0.42529848338173604, "grad_norm": 1.6037327646115784, "learning_rate": 6.618981327411983e-07, "loss": 0.592, "step": 2636 }, { "epoch": 0.425459825750242, "grad_norm": 1.2603387992492843, "learning_rate": 6.616457026855813e-07, "loss": 0.4231, "step": 2637 }, { "epoch": 0.425621168118748, "grad_norm": 1.765981047523212, "learning_rate": 6.613932266119055e-07, "loss": 0.5711, "step": 2638 }, { "epoch": 0.42578251048725396, "grad_norm": 1.5426073641021245, "learning_rate": 6.611407045920469e-07, "loss": 0.5212, "step": 2639 }, { "epoch": 0.4259438528557599, "grad_norm": 1.8569798791028465, "learning_rate": 6.608881366978943e-07, "loss": 0.7334, "step": 2640 }, { "epoch": 0.4261051952242659, "grad_norm": 2.1427267852369503, "learning_rate": 6.606355230013505e-07, "loss": 0.6606, "step": 2641 }, { "epoch": 0.4262665375927719, "grad_norm": 1.9560424987138962, "learning_rate": 6.603828635743302e-07, "loss": 0.8203, "step": 2642 }, { "epoch": 0.42642787996127784, "grad_norm": 1.6316421914270072, "learning_rate": 6.601301584887623e-07, "loss": 0.5218, "step": 2643 }, { "epoch": 0.4265892223297838, "grad_norm": 1.6728806367876234, "learning_rate": 6.598774078165875e-07, "loss": 0.5301, "step": 2644 }, { "epoch": 0.42675056469828976, "grad_norm": 1.3637818562312296, "learning_rate": 6.596246116297603e-07, "loss": 0.5375, "step": 2645 }, { "epoch": 0.4269119070667957, "grad_norm": 1.7818544265806409, "learning_rate": 6.593717700002479e-07, "loss": 0.4794, "step": 2646 }, { "epoch": 0.4270732494353017, "grad_norm": 1.21309931582934, "learning_rate": 6.591188830000305e-07, "loss": 0.5155, "step": 2647 }, { "epoch": 0.4272345918038077, "grad_norm": 1.3743363426083646, "learning_rate": 6.588659507011009e-07, "loss": 0.4426, "step": 2648 }, { "epoch": 0.42739593417231364, "grad_norm": 1.7555801797634094, "learning_rate": 6.586129731754654e-07, "loss": 0.6296, "step": 2649 }, { "epoch": 0.4275572765408196, "grad_norm": 1.424793894402018, "learning_rate": 6.583599504951424e-07, "loss": 0.3684, "step": 2650 }, { "epoch": 0.4277186189093256, "grad_norm": 1.586265642766875, "learning_rate": 6.581068827321643e-07, "loss": 0.6088, "step": 2651 }, { "epoch": 0.42787996127783157, "grad_norm": 3.3598805601598456, "learning_rate": 6.578537699585749e-07, "loss": 0.4306, "step": 2652 }, { "epoch": 0.4280413036463375, "grad_norm": 2.200948952706408, "learning_rate": 6.576006122464318e-07, "loss": 0.5324, "step": 2653 }, { "epoch": 0.4282026460148435, "grad_norm": 1.590166063933456, "learning_rate": 6.573474096678051e-07, "loss": 0.5483, "step": 2654 }, { "epoch": 0.4283639883833495, "grad_norm": 1.538781929583508, "learning_rate": 6.57094162294778e-07, "loss": 0.3944, "step": 2655 }, { "epoch": 0.42852533075185545, "grad_norm": 1.6905002061403156, "learning_rate": 6.568408701994458e-07, "loss": 0.4994, "step": 2656 }, { "epoch": 0.4286866731203614, "grad_norm": 1.6963882900417642, "learning_rate": 6.565875334539169e-07, "loss": 0.5577, "step": 2657 }, { "epoch": 0.42884801548886736, "grad_norm": 2.1459334884034993, "learning_rate": 6.563341521303125e-07, "loss": 0.5188, "step": 2658 }, { "epoch": 0.4290093578573733, "grad_norm": 1.7472837348680028, "learning_rate": 6.560807263007663e-07, "loss": 0.5606, "step": 2659 }, { "epoch": 0.42917070022587933, "grad_norm": 1.8709992641730928, "learning_rate": 6.558272560374249e-07, "loss": 0.6041, "step": 2660 }, { "epoch": 0.4293320425943853, "grad_norm": 1.9397831661920466, "learning_rate": 6.555737414124474e-07, "loss": 0.601, "step": 2661 }, { "epoch": 0.42949338496289124, "grad_norm": 1.3685276983187713, "learning_rate": 6.553201824980053e-07, "loss": 0.6364, "step": 2662 }, { "epoch": 0.4296547273313972, "grad_norm": 1.525295279010391, "learning_rate": 6.550665793662832e-07, "loss": 0.5107, "step": 2663 }, { "epoch": 0.4298160696999032, "grad_norm": 1.6080520525568505, "learning_rate": 6.54812932089478e-07, "loss": 0.483, "step": 2664 }, { "epoch": 0.42997741206840917, "grad_norm": 1.2990972483899081, "learning_rate": 6.54559240739799e-07, "loss": 0.3627, "step": 2665 }, { "epoch": 0.4301387544369151, "grad_norm": 1.8556443944414098, "learning_rate": 6.543055053894685e-07, "loss": 0.5889, "step": 2666 }, { "epoch": 0.4303000968054211, "grad_norm": 1.7630862322970078, "learning_rate": 6.540517261107208e-07, "loss": 0.674, "step": 2667 }, { "epoch": 0.4304614391739271, "grad_norm": 1.4233914347961354, "learning_rate": 6.537979029758033e-07, "loss": 0.5082, "step": 2668 }, { "epoch": 0.43062278154243305, "grad_norm": 1.6406712116817643, "learning_rate": 6.535440360569755e-07, "loss": 0.6177, "step": 2669 }, { "epoch": 0.430784123910939, "grad_norm": 1.7148520919145052, "learning_rate": 6.532901254265092e-07, "loss": 0.5627, "step": 2670 }, { "epoch": 0.43094546627944497, "grad_norm": 2.4129023557368234, "learning_rate": 6.53036171156689e-07, "loss": 0.5156, "step": 2671 }, { "epoch": 0.431106808647951, "grad_norm": 1.3407565641425672, "learning_rate": 6.527821733198116e-07, "loss": 0.4967, "step": 2672 }, { "epoch": 0.43126815101645694, "grad_norm": 1.7237234810386097, "learning_rate": 6.525281319881868e-07, "loss": 0.6015, "step": 2673 }, { "epoch": 0.4314294933849629, "grad_norm": 1.6087348924237215, "learning_rate": 6.52274047234136e-07, "loss": 0.571, "step": 2674 }, { "epoch": 0.43159083575346885, "grad_norm": 1.4601624416752752, "learning_rate": 6.520199191299931e-07, "loss": 0.448, "step": 2675 }, { "epoch": 0.4317521781219748, "grad_norm": 1.5785264363870897, "learning_rate": 6.517657477481046e-07, "loss": 0.5205, "step": 2676 }, { "epoch": 0.4319135204904808, "grad_norm": 1.6945253293825222, "learning_rate": 6.51511533160829e-07, "loss": 0.5763, "step": 2677 }, { "epoch": 0.4320748628589868, "grad_norm": 1.6012549128222533, "learning_rate": 6.512572754405379e-07, "loss": 0.4942, "step": 2678 }, { "epoch": 0.43223620522749273, "grad_norm": 1.5768148441459815, "learning_rate": 6.510029746596141e-07, "loss": 0.3097, "step": 2679 }, { "epoch": 0.4323975475959987, "grad_norm": 1.4356157143624266, "learning_rate": 6.507486308904531e-07, "loss": 0.4322, "step": 2680 }, { "epoch": 0.4325588899645047, "grad_norm": 2.427671031171563, "learning_rate": 6.504942442054629e-07, "loss": 0.6598, "step": 2681 }, { "epoch": 0.43272023233301066, "grad_norm": 2.0752197838630657, "learning_rate": 6.502398146770633e-07, "loss": 0.6622, "step": 2682 }, { "epoch": 0.4328815747015166, "grad_norm": 1.8090448661415264, "learning_rate": 6.499853423776869e-07, "loss": 0.438, "step": 2683 }, { "epoch": 0.43304291707002257, "grad_norm": 2.064039638256954, "learning_rate": 6.497308273797777e-07, "loss": 0.6325, "step": 2684 }, { "epoch": 0.4332042594385286, "grad_norm": 1.7288242215478762, "learning_rate": 6.49476269755792e-07, "loss": 0.4655, "step": 2685 }, { "epoch": 0.43336560180703454, "grad_norm": 1.6685487690154968, "learning_rate": 6.492216695781991e-07, "loss": 0.6551, "step": 2686 }, { "epoch": 0.4335269441755405, "grad_norm": 1.320932457189814, "learning_rate": 6.489670269194794e-07, "loss": 0.4827, "step": 2687 }, { "epoch": 0.43368828654404645, "grad_norm": 1.4904726570382913, "learning_rate": 6.487123418521259e-07, "loss": 0.4298, "step": 2688 }, { "epoch": 0.4338496289125524, "grad_norm": 1.552270745690334, "learning_rate": 6.484576144486432e-07, "loss": 0.6798, "step": 2689 }, { "epoch": 0.4340109712810584, "grad_norm": 1.9878365669538693, "learning_rate": 6.482028447815488e-07, "loss": 0.6274, "step": 2690 }, { "epoch": 0.4341723136495644, "grad_norm": 1.4271580567485371, "learning_rate": 6.479480329233714e-07, "loss": 0.5542, "step": 2691 }, { "epoch": 0.43433365601807034, "grad_norm": 1.291130949516523, "learning_rate": 6.476931789466522e-07, "loss": 0.5473, "step": 2692 }, { "epoch": 0.4344949983865763, "grad_norm": 1.7056296593758644, "learning_rate": 6.47438282923944e-07, "loss": 0.7688, "step": 2693 }, { "epoch": 0.4346563407550823, "grad_norm": 1.7648970867336329, "learning_rate": 6.471833449278119e-07, "loss": 0.7135, "step": 2694 }, { "epoch": 0.43481768312358826, "grad_norm": 1.7382440798782228, "learning_rate": 6.46928365030833e-07, "loss": 0.5102, "step": 2695 }, { "epoch": 0.4349790254920942, "grad_norm": 1.6843078231862667, "learning_rate": 6.466733433055962e-07, "loss": 0.5715, "step": 2696 }, { "epoch": 0.4351403678606002, "grad_norm": 1.4359001711039465, "learning_rate": 6.464182798247021e-07, "loss": 0.5009, "step": 2697 }, { "epoch": 0.4353017102291062, "grad_norm": 1.2085181401871625, "learning_rate": 6.461631746607634e-07, "loss": 0.4695, "step": 2698 }, { "epoch": 0.43546305259761214, "grad_norm": 1.2214819780941666, "learning_rate": 6.459080278864047e-07, "loss": 0.5524, "step": 2699 }, { "epoch": 0.4356243949661181, "grad_norm": 1.6551018099860406, "learning_rate": 6.456528395742622e-07, "loss": 0.5389, "step": 2700 }, { "epoch": 0.43578573733462406, "grad_norm": 2.2013903342749352, "learning_rate": 6.453976097969844e-07, "loss": 0.6446, "step": 2701 }, { "epoch": 0.43594707970313007, "grad_norm": 2.3914317845324424, "learning_rate": 6.451423386272311e-07, "loss": 0.734, "step": 2702 }, { "epoch": 0.436108422071636, "grad_norm": 1.2333310698582953, "learning_rate": 6.448870261376743e-07, "loss": 0.4189, "step": 2703 }, { "epoch": 0.436269764440142, "grad_norm": 1.822104754609523, "learning_rate": 6.446316724009974e-07, "loss": 0.4811, "step": 2704 }, { "epoch": 0.43643110680864794, "grad_norm": 1.75577104492698, "learning_rate": 6.443762774898956e-07, "loss": 0.5239, "step": 2705 }, { "epoch": 0.4365924491771539, "grad_norm": 1.3661740279648376, "learning_rate": 6.441208414770763e-07, "loss": 0.4483, "step": 2706 }, { "epoch": 0.4367537915456599, "grad_norm": 1.282685197096253, "learning_rate": 6.438653644352578e-07, "loss": 0.5571, "step": 2707 }, { "epoch": 0.43691513391416587, "grad_norm": 2.435931850620103, "learning_rate": 6.436098464371706e-07, "loss": 0.5764, "step": 2708 }, { "epoch": 0.4370764762826718, "grad_norm": 1.4917800906371592, "learning_rate": 6.433542875555571e-07, "loss": 0.3768, "step": 2709 }, { "epoch": 0.4372378186511778, "grad_norm": 1.4544807590380373, "learning_rate": 6.430986878631707e-07, "loss": 0.4861, "step": 2710 }, { "epoch": 0.4373991610196838, "grad_norm": 1.8239202153006109, "learning_rate": 6.428430474327767e-07, "loss": 0.5029, "step": 2711 }, { "epoch": 0.43756050338818975, "grad_norm": 1.8844849056247672, "learning_rate": 6.425873663371521e-07, "loss": 0.5609, "step": 2712 }, { "epoch": 0.4377218457566957, "grad_norm": 1.551192298818105, "learning_rate": 6.423316446490854e-07, "loss": 0.4783, "step": 2713 }, { "epoch": 0.43788318812520166, "grad_norm": 1.7491481615129327, "learning_rate": 6.420758824413768e-07, "loss": 0.4844, "step": 2714 }, { "epoch": 0.4380445304937077, "grad_norm": 1.1863649364458997, "learning_rate": 6.418200797868377e-07, "loss": 0.5294, "step": 2715 }, { "epoch": 0.43820587286221363, "grad_norm": 1.2906133397201403, "learning_rate": 6.415642367582911e-07, "loss": 0.3662, "step": 2716 }, { "epoch": 0.4383672152307196, "grad_norm": 1.6502778686287691, "learning_rate": 6.413083534285717e-07, "loss": 0.5673, "step": 2717 }, { "epoch": 0.43852855759922554, "grad_norm": 1.2769298757897107, "learning_rate": 6.410524298705258e-07, "loss": 0.3591, "step": 2718 }, { "epoch": 0.4386898999677315, "grad_norm": 1.66615056948174, "learning_rate": 6.407964661570108e-07, "loss": 0.5366, "step": 2719 }, { "epoch": 0.4388512423362375, "grad_norm": 1.440589983797981, "learning_rate": 6.405404623608954e-07, "loss": 0.6044, "step": 2720 }, { "epoch": 0.43901258470474347, "grad_norm": 1.956196010929288, "learning_rate": 6.402844185550602e-07, "loss": 0.6514, "step": 2721 }, { "epoch": 0.4391739270732494, "grad_norm": 1.623032405710737, "learning_rate": 6.40028334812397e-07, "loss": 0.5217, "step": 2722 }, { "epoch": 0.4393352694417554, "grad_norm": 1.8874215343409821, "learning_rate": 6.397722112058087e-07, "loss": 0.5863, "step": 2723 }, { "epoch": 0.4394966118102614, "grad_norm": 1.8787179325130978, "learning_rate": 6.3951604780821e-07, "loss": 0.5164, "step": 2724 }, { "epoch": 0.43965795417876735, "grad_norm": 1.6123952580226326, "learning_rate": 6.392598446925265e-07, "loss": 0.4366, "step": 2725 }, { "epoch": 0.4398192965472733, "grad_norm": 1.6683912970387942, "learning_rate": 6.390036019316956e-07, "loss": 0.5383, "step": 2726 }, { "epoch": 0.43998063891577927, "grad_norm": 1.465811447940176, "learning_rate": 6.387473195986654e-07, "loss": 0.4763, "step": 2727 }, { "epoch": 0.4401419812842853, "grad_norm": 1.4909806888795765, "learning_rate": 6.384909977663956e-07, "loss": 0.4283, "step": 2728 }, { "epoch": 0.44030332365279123, "grad_norm": 1.5167275577241093, "learning_rate": 6.382346365078572e-07, "loss": 0.472, "step": 2729 }, { "epoch": 0.4404646660212972, "grad_norm": 1.6103794703890486, "learning_rate": 6.379782358960324e-07, "loss": 0.4551, "step": 2730 }, { "epoch": 0.44062600838980315, "grad_norm": 1.5712348165210925, "learning_rate": 6.377217960039143e-07, "loss": 0.6083, "step": 2731 }, { "epoch": 0.4407873507583091, "grad_norm": 1.4746149530368422, "learning_rate": 6.374653169045076e-07, "loss": 0.5086, "step": 2732 }, { "epoch": 0.4409486931268151, "grad_norm": 1.131739667430282, "learning_rate": 6.37208798670828e-07, "loss": 0.4531, "step": 2733 }, { "epoch": 0.4411100354953211, "grad_norm": 2.0786203353364274, "learning_rate": 6.369522413759021e-07, "loss": 0.5431, "step": 2734 }, { "epoch": 0.44127137786382703, "grad_norm": 1.7081458601391446, "learning_rate": 6.366956450927681e-07, "loss": 0.4869, "step": 2735 }, { "epoch": 0.441432720232333, "grad_norm": 1.495133692016799, "learning_rate": 6.36439009894475e-07, "loss": 0.5373, "step": 2736 }, { "epoch": 0.441594062600839, "grad_norm": 1.65792761716688, "learning_rate": 6.361823358540827e-07, "loss": 0.6415, "step": 2737 }, { "epoch": 0.44175540496934496, "grad_norm": 2.1063217105600143, "learning_rate": 6.359256230446625e-07, "loss": 0.5118, "step": 2738 }, { "epoch": 0.4419167473378509, "grad_norm": 1.7528463104735876, "learning_rate": 6.356688715392966e-07, "loss": 0.6189, "step": 2739 }, { "epoch": 0.44207808970635687, "grad_norm": 2.011940716099477, "learning_rate": 6.354120814110783e-07, "loss": 0.5249, "step": 2740 }, { "epoch": 0.4422394320748629, "grad_norm": 1.6580809064021251, "learning_rate": 6.351552527331117e-07, "loss": 0.5131, "step": 2741 }, { "epoch": 0.44240077444336884, "grad_norm": 1.840523304744832, "learning_rate": 6.348983855785121e-07, "loss": 0.516, "step": 2742 }, { "epoch": 0.4425621168118748, "grad_norm": 1.4895438698118273, "learning_rate": 6.346414800204056e-07, "loss": 0.5692, "step": 2743 }, { "epoch": 0.44272345918038075, "grad_norm": 1.737461333908056, "learning_rate": 6.343845361319293e-07, "loss": 0.3829, "step": 2744 }, { "epoch": 0.44288480154888676, "grad_norm": 1.857894051240152, "learning_rate": 6.341275539862313e-07, "loss": 0.5338, "step": 2745 }, { "epoch": 0.4430461439173927, "grad_norm": 1.6842381777422764, "learning_rate": 6.338705336564703e-07, "loss": 0.5435, "step": 2746 }, { "epoch": 0.4432074862858987, "grad_norm": 1.8206566743095767, "learning_rate": 6.336134752158159e-07, "loss": 0.272, "step": 2747 }, { "epoch": 0.44336882865440463, "grad_norm": 1.583029575657194, "learning_rate": 6.333563787374492e-07, "loss": 0.3803, "step": 2748 }, { "epoch": 0.4435301710229106, "grad_norm": 2.3486743312663347, "learning_rate": 6.330992442945612e-07, "loss": 0.695, "step": 2749 }, { "epoch": 0.4436915133914166, "grad_norm": 1.5690135992711713, "learning_rate": 6.328420719603546e-07, "loss": 0.3921, "step": 2750 }, { "epoch": 0.44385285575992256, "grad_norm": 1.6636264570010415, "learning_rate": 6.325848618080418e-07, "loss": 0.5004, "step": 2751 }, { "epoch": 0.4440141981284285, "grad_norm": 1.7412963093160705, "learning_rate": 6.323276139108471e-07, "loss": 0.6646, "step": 2752 }, { "epoch": 0.4441755404969345, "grad_norm": 1.9658063043814427, "learning_rate": 6.320703283420048e-07, "loss": 0.5916, "step": 2753 }, { "epoch": 0.4443368828654405, "grad_norm": 1.6609930106331183, "learning_rate": 6.318130051747604e-07, "loss": 0.6451, "step": 2754 }, { "epoch": 0.44449822523394644, "grad_norm": 1.7449897843419846, "learning_rate": 6.315556444823695e-07, "loss": 0.5641, "step": 2755 }, { "epoch": 0.4446595676024524, "grad_norm": 1.821069977292986, "learning_rate": 6.312982463380991e-07, "loss": 0.5055, "step": 2756 }, { "epoch": 0.44482090997095836, "grad_norm": 1.1682262328212099, "learning_rate": 6.310408108152263e-07, "loss": 0.4735, "step": 2757 }, { "epoch": 0.44498225233946437, "grad_norm": 1.782224221423976, "learning_rate": 6.307833379870393e-07, "loss": 0.4053, "step": 2758 }, { "epoch": 0.4451435947079703, "grad_norm": 1.4244686407598803, "learning_rate": 6.305258279268364e-07, "loss": 0.3935, "step": 2759 }, { "epoch": 0.4453049370764763, "grad_norm": 2.5922641404809497, "learning_rate": 6.302682807079269e-07, "loss": 0.4468, "step": 2760 }, { "epoch": 0.44546627944498224, "grad_norm": 2.0158040758175186, "learning_rate": 6.300106964036306e-07, "loss": 0.5456, "step": 2761 }, { "epoch": 0.4456276218134882, "grad_norm": 2.8227879024478284, "learning_rate": 6.297530750872777e-07, "loss": 0.4923, "step": 2762 }, { "epoch": 0.4457889641819942, "grad_norm": 1.3826474452247617, "learning_rate": 6.294954168322091e-07, "loss": 0.346, "step": 2763 }, { "epoch": 0.44595030655050016, "grad_norm": 1.534734614664086, "learning_rate": 6.292377217117762e-07, "loss": 0.5077, "step": 2764 }, { "epoch": 0.4461116489190061, "grad_norm": 1.5420769154208005, "learning_rate": 6.289799897993406e-07, "loss": 0.4337, "step": 2765 }, { "epoch": 0.4462729912875121, "grad_norm": 1.5196671216768383, "learning_rate": 6.287222211682751e-07, "loss": 0.5948, "step": 2766 }, { "epoch": 0.4464343336560181, "grad_norm": 1.96286013755086, "learning_rate": 6.284644158919622e-07, "loss": 0.5935, "step": 2767 }, { "epoch": 0.44659567602452405, "grad_norm": 1.7993426420661907, "learning_rate": 6.282065740437952e-07, "loss": 0.6901, "step": 2768 }, { "epoch": 0.44675701839303, "grad_norm": 1.906465236832098, "learning_rate": 6.279486956971775e-07, "loss": 0.6571, "step": 2769 }, { "epoch": 0.44691836076153596, "grad_norm": 2.0643760933845714, "learning_rate": 6.276907809255234e-07, "loss": 0.5827, "step": 2770 }, { "epoch": 0.44707970313004197, "grad_norm": 1.3767177515888933, "learning_rate": 6.274328298022573e-07, "loss": 0.4692, "step": 2771 }, { "epoch": 0.44724104549854793, "grad_norm": 1.4344679326554828, "learning_rate": 6.271748424008139e-07, "loss": 0.4715, "step": 2772 }, { "epoch": 0.4474023878670539, "grad_norm": 2.120752465205866, "learning_rate": 6.269168187946378e-07, "loss": 0.6644, "step": 2773 }, { "epoch": 0.44756373023555984, "grad_norm": 2.462534552781954, "learning_rate": 6.266587590571852e-07, "loss": 0.4519, "step": 2774 }, { "epoch": 0.44772507260406585, "grad_norm": 1.446537879904096, "learning_rate": 6.264006632619212e-07, "loss": 0.4841, "step": 2775 }, { "epoch": 0.4478864149725718, "grad_norm": 1.528632827348202, "learning_rate": 6.261425314823219e-07, "loss": 0.4826, "step": 2776 }, { "epoch": 0.44804775734107777, "grad_norm": 1.9089748106783824, "learning_rate": 6.258843637918735e-07, "loss": 0.5966, "step": 2777 }, { "epoch": 0.4482090997095837, "grad_norm": 1.5883253189930795, "learning_rate": 6.256261602640721e-07, "loss": 0.5601, "step": 2778 }, { "epoch": 0.4483704420780897, "grad_norm": 1.723121632104681, "learning_rate": 6.253679209724247e-07, "loss": 0.5123, "step": 2779 }, { "epoch": 0.4485317844465957, "grad_norm": 1.5278247748958793, "learning_rate": 6.251096459904478e-07, "loss": 0.4348, "step": 2780 }, { "epoch": 0.44869312681510165, "grad_norm": 1.728509280090756, "learning_rate": 6.248513353916686e-07, "loss": 0.6717, "step": 2781 }, { "epoch": 0.4488544691836076, "grad_norm": 1.7831289186110362, "learning_rate": 6.245929892496238e-07, "loss": 0.5953, "step": 2782 }, { "epoch": 0.44901581155211356, "grad_norm": 1.6870945337397476, "learning_rate": 6.24334607637861e-07, "loss": 0.7776, "step": 2783 }, { "epoch": 0.4491771539206196, "grad_norm": 1.4879985550852908, "learning_rate": 6.240761906299372e-07, "loss": 0.4174, "step": 2784 }, { "epoch": 0.44933849628912553, "grad_norm": 1.255258466845875, "learning_rate": 6.238177382994198e-07, "loss": 0.2963, "step": 2785 }, { "epoch": 0.4494998386576315, "grad_norm": 1.2157944764605588, "learning_rate": 6.235592507198864e-07, "loss": 0.3984, "step": 2786 }, { "epoch": 0.44966118102613745, "grad_norm": 1.849338080581388, "learning_rate": 6.233007279649243e-07, "loss": 0.5593, "step": 2787 }, { "epoch": 0.44982252339464346, "grad_norm": 1.9713921150643499, "learning_rate": 6.23042170108131e-07, "loss": 0.5373, "step": 2788 }, { "epoch": 0.4499838657631494, "grad_norm": 1.5884144049358975, "learning_rate": 6.227835772231141e-07, "loss": 0.3162, "step": 2789 }, { "epoch": 0.45014520813165537, "grad_norm": 1.4963022589739585, "learning_rate": 6.225249493834909e-07, "loss": 0.5052, "step": 2790 }, { "epoch": 0.45030655050016133, "grad_norm": 1.9067947984981792, "learning_rate": 6.222662866628888e-07, "loss": 0.4347, "step": 2791 }, { "epoch": 0.4504678928686673, "grad_norm": 1.68322027538076, "learning_rate": 6.220075891349451e-07, "loss": 0.443, "step": 2792 }, { "epoch": 0.4506292352371733, "grad_norm": 1.135929266100902, "learning_rate": 6.217488568733072e-07, "loss": 0.4558, "step": 2793 }, { "epoch": 0.45079057760567925, "grad_norm": 1.5839251449555272, "learning_rate": 6.21490089951632e-07, "loss": 0.6258, "step": 2794 }, { "epoch": 0.4509519199741852, "grad_norm": 1.3596311744380698, "learning_rate": 6.212312884435866e-07, "loss": 0.4381, "step": 2795 }, { "epoch": 0.45111326234269117, "grad_norm": 2.6696941554886204, "learning_rate": 6.209724524228477e-07, "loss": 0.6174, "step": 2796 }, { "epoch": 0.4512746047111972, "grad_norm": 1.434394716613494, "learning_rate": 6.207135819631021e-07, "loss": 0.4433, "step": 2797 }, { "epoch": 0.45143594707970314, "grad_norm": 1.7575444674123157, "learning_rate": 6.204546771380462e-07, "loss": 0.3603, "step": 2798 }, { "epoch": 0.4515972894482091, "grad_norm": 1.5590148968673054, "learning_rate": 6.201957380213864e-07, "loss": 0.4851, "step": 2799 }, { "epoch": 0.45175863181671505, "grad_norm": 1.9117799812241725, "learning_rate": 6.199367646868384e-07, "loss": 0.6033, "step": 2800 }, { "epoch": 0.45191997418522106, "grad_norm": 1.8302068349549514, "learning_rate": 6.196777572081283e-07, "loss": 0.3147, "step": 2801 }, { "epoch": 0.452081316553727, "grad_norm": 1.6620195770212383, "learning_rate": 6.194187156589913e-07, "loss": 0.5027, "step": 2802 }, { "epoch": 0.452242658922233, "grad_norm": 1.205112945201967, "learning_rate": 6.191596401131726e-07, "loss": 0.4577, "step": 2803 }, { "epoch": 0.45240400129073893, "grad_norm": 1.753414365445511, "learning_rate": 6.189005306444271e-07, "loss": 0.454, "step": 2804 }, { "epoch": 0.45256534365924495, "grad_norm": 1.121561730300329, "learning_rate": 6.186413873265192e-07, "loss": 0.5075, "step": 2805 }, { "epoch": 0.4527266860277509, "grad_norm": 1.5711587172571817, "learning_rate": 6.183822102332234e-07, "loss": 0.5065, "step": 2806 }, { "epoch": 0.45288802839625686, "grad_norm": 1.8054548341420316, "learning_rate": 6.18122999438323e-07, "loss": 0.4535, "step": 2807 }, { "epoch": 0.4530493707647628, "grad_norm": 1.3914166886518, "learning_rate": 6.178637550156116e-07, "loss": 0.4124, "step": 2808 }, { "epoch": 0.45321071313326877, "grad_norm": 1.7107818393356111, "learning_rate": 6.17604477038892e-07, "loss": 0.5765, "step": 2809 }, { "epoch": 0.4533720555017748, "grad_norm": 2.1263037777154405, "learning_rate": 6.173451655819768e-07, "loss": 0.6805, "step": 2810 }, { "epoch": 0.45353339787028074, "grad_norm": 1.885529263613501, "learning_rate": 6.170858207186879e-07, "loss": 0.5048, "step": 2811 }, { "epoch": 0.4536947402387867, "grad_norm": 1.5195748683288175, "learning_rate": 6.168264425228569e-07, "loss": 0.6326, "step": 2812 }, { "epoch": 0.45385608260729265, "grad_norm": 1.605094921834645, "learning_rate": 6.165670310683246e-07, "loss": 0.6059, "step": 2813 }, { "epoch": 0.45401742497579867, "grad_norm": 2.133305307541977, "learning_rate": 6.163075864289418e-07, "loss": 0.6885, "step": 2814 }, { "epoch": 0.4541787673443046, "grad_norm": 1.6481004704894773, "learning_rate": 6.160481086785682e-07, "loss": 0.6353, "step": 2815 }, { "epoch": 0.4543401097128106, "grad_norm": 1.3188204940829127, "learning_rate": 6.157885978910732e-07, "loss": 0.4383, "step": 2816 }, { "epoch": 0.45450145208131654, "grad_norm": 2.422057212620056, "learning_rate": 6.155290541403356e-07, "loss": 0.7177, "step": 2817 }, { "epoch": 0.45466279444982255, "grad_norm": 1.1287776781408314, "learning_rate": 6.152694775002434e-07, "loss": 0.3906, "step": 2818 }, { "epoch": 0.4548241368183285, "grad_norm": 1.7035396666077889, "learning_rate": 6.150098680446943e-07, "loss": 0.5152, "step": 2819 }, { "epoch": 0.45498547918683446, "grad_norm": 1.7440134239161356, "learning_rate": 6.147502258475948e-07, "loss": 0.5127, "step": 2820 }, { "epoch": 0.4551468215553404, "grad_norm": 1.3206674222907098, "learning_rate": 6.144905509828615e-07, "loss": 0.5284, "step": 2821 }, { "epoch": 0.4553081639238464, "grad_norm": 1.6568695205258275, "learning_rate": 6.142308435244194e-07, "loss": 0.6272, "step": 2822 }, { "epoch": 0.4554695062923524, "grad_norm": 1.5814922403536567, "learning_rate": 6.139711035462038e-07, "loss": 0.4325, "step": 2823 }, { "epoch": 0.45563084866085835, "grad_norm": 1.792722065895351, "learning_rate": 6.137113311221582e-07, "loss": 0.6388, "step": 2824 }, { "epoch": 0.4557921910293643, "grad_norm": 1.5337713075632948, "learning_rate": 6.134515263262362e-07, "loss": 0.4352, "step": 2825 }, { "epoch": 0.45595353339787026, "grad_norm": 1.4829185166561332, "learning_rate": 6.131916892323998e-07, "loss": 0.4677, "step": 2826 }, { "epoch": 0.45611487576637627, "grad_norm": 2.1831109838857587, "learning_rate": 6.129318199146211e-07, "loss": 0.6931, "step": 2827 }, { "epoch": 0.4562762181348822, "grad_norm": 1.6063650186525587, "learning_rate": 6.126719184468807e-07, "loss": 0.5205, "step": 2828 }, { "epoch": 0.4564375605033882, "grad_norm": 1.778832049323808, "learning_rate": 6.124119849031687e-07, "loss": 0.451, "step": 2829 }, { "epoch": 0.45659890287189414, "grad_norm": 1.4958953327446074, "learning_rate": 6.121520193574841e-07, "loss": 0.5107, "step": 2830 }, { "epoch": 0.45676024524040015, "grad_norm": 1.5615300792101952, "learning_rate": 6.118920218838349e-07, "loss": 0.4441, "step": 2831 }, { "epoch": 0.4569215876089061, "grad_norm": 1.2182981680575653, "learning_rate": 6.116319925562388e-07, "loss": 0.3038, "step": 2832 }, { "epoch": 0.45708292997741207, "grad_norm": 2.8894010684821687, "learning_rate": 6.11371931448722e-07, "loss": 0.5707, "step": 2833 }, { "epoch": 0.457244272345918, "grad_norm": 1.344499556385675, "learning_rate": 6.1111183863532e-07, "loss": 0.4963, "step": 2834 }, { "epoch": 0.457405614714424, "grad_norm": 1.881086516009535, "learning_rate": 6.108517141900771e-07, "loss": 0.6385, "step": 2835 }, { "epoch": 0.45756695708293, "grad_norm": 1.7612287766184345, "learning_rate": 6.105915581870468e-07, "loss": 0.4856, "step": 2836 }, { "epoch": 0.45772829945143595, "grad_norm": 1.5623260401208348, "learning_rate": 6.103313707002916e-07, "loss": 0.4636, "step": 2837 }, { "epoch": 0.4578896418199419, "grad_norm": 1.5558665802884801, "learning_rate": 6.100711518038828e-07, "loss": 0.6048, "step": 2838 }, { "epoch": 0.45805098418844786, "grad_norm": 1.6609775082640295, "learning_rate": 6.098109015719009e-07, "loss": 0.5679, "step": 2839 }, { "epoch": 0.4582123265569539, "grad_norm": 1.354493463649025, "learning_rate": 6.095506200784348e-07, "loss": 0.377, "step": 2840 }, { "epoch": 0.45837366892545983, "grad_norm": 1.5245650593608655, "learning_rate": 6.092903073975832e-07, "loss": 0.5093, "step": 2841 }, { "epoch": 0.4585350112939658, "grad_norm": 1.3862167094355011, "learning_rate": 6.090299636034528e-07, "loss": 0.3649, "step": 2842 }, { "epoch": 0.45869635366247175, "grad_norm": 1.1058759714361412, "learning_rate": 6.087695887701594e-07, "loss": 0.451, "step": 2843 }, { "epoch": 0.45885769603097776, "grad_norm": 2.2778644260993595, "learning_rate": 6.085091829718279e-07, "loss": 0.5091, "step": 2844 }, { "epoch": 0.4590190383994837, "grad_norm": 1.670035660642791, "learning_rate": 6.08248746282592e-07, "loss": 0.4463, "step": 2845 }, { "epoch": 0.45918038076798967, "grad_norm": 1.6651185315987813, "learning_rate": 6.079882787765938e-07, "loss": 0.5512, "step": 2846 }, { "epoch": 0.4593417231364956, "grad_norm": 1.491312939066299, "learning_rate": 6.077277805279844e-07, "loss": 0.4734, "step": 2847 }, { "epoch": 0.45950306550500164, "grad_norm": 2.077840799961376, "learning_rate": 6.074672516109237e-07, "loss": 0.493, "step": 2848 }, { "epoch": 0.4596644078735076, "grad_norm": 1.9303407374214605, "learning_rate": 6.072066920995804e-07, "loss": 0.6001, "step": 2849 }, { "epoch": 0.45982575024201355, "grad_norm": 1.3105028943841635, "learning_rate": 6.06946102068132e-07, "loss": 0.4438, "step": 2850 }, { "epoch": 0.4599870926105195, "grad_norm": 1.9304187331287048, "learning_rate": 6.06685481590764e-07, "loss": 0.5372, "step": 2851 }, { "epoch": 0.46014843497902547, "grad_norm": 1.7726546252643778, "learning_rate": 6.064248307416713e-07, "loss": 0.5034, "step": 2852 }, { "epoch": 0.4603097773475315, "grad_norm": 2.488861543310274, "learning_rate": 6.061641495950573e-07, "loss": 0.5533, "step": 2853 }, { "epoch": 0.46047111971603744, "grad_norm": 1.999890622485977, "learning_rate": 6.059034382251338e-07, "loss": 0.6654, "step": 2854 }, { "epoch": 0.4606324620845434, "grad_norm": 1.776050877104413, "learning_rate": 6.056426967061214e-07, "loss": 0.4916, "step": 2855 }, { "epoch": 0.46079380445304935, "grad_norm": 2.1261091142790183, "learning_rate": 6.053819251122493e-07, "loss": 0.6574, "step": 2856 }, { "epoch": 0.46095514682155536, "grad_norm": 1.8874745878820496, "learning_rate": 6.051211235177551e-07, "loss": 0.6789, "step": 2857 }, { "epoch": 0.4611164891900613, "grad_norm": 1.570485878857533, "learning_rate": 6.048602919968849e-07, "loss": 0.3106, "step": 2858 }, { "epoch": 0.4612778315585673, "grad_norm": 1.5191110064168762, "learning_rate": 6.045994306238937e-07, "loss": 0.3725, "step": 2859 }, { "epoch": 0.46143917392707323, "grad_norm": 1.5133587125647072, "learning_rate": 6.043385394730446e-07, "loss": 0.5258, "step": 2860 }, { "epoch": 0.46160051629557924, "grad_norm": 1.7916854886981077, "learning_rate": 6.040776186186092e-07, "loss": 0.5846, "step": 2861 }, { "epoch": 0.4617618586640852, "grad_norm": 1.929777320902664, "learning_rate": 6.038166681348679e-07, "loss": 0.5372, "step": 2862 }, { "epoch": 0.46192320103259116, "grad_norm": 1.677062711727129, "learning_rate": 6.035556880961092e-07, "loss": 0.5647, "step": 2863 }, { "epoch": 0.4620845434010971, "grad_norm": 1.690455356625587, "learning_rate": 6.032946785766303e-07, "loss": 0.6295, "step": 2864 }, { "epoch": 0.46224588576960307, "grad_norm": 1.5632973734468845, "learning_rate": 6.030336396507364e-07, "loss": 0.4045, "step": 2865 }, { "epoch": 0.4624072281381091, "grad_norm": 1.6717666163028755, "learning_rate": 6.027725713927412e-07, "loss": 0.7016, "step": 2866 }, { "epoch": 0.46256857050661504, "grad_norm": 1.5414779994407997, "learning_rate": 6.025114738769669e-07, "loss": 0.4847, "step": 2867 }, { "epoch": 0.462729912875121, "grad_norm": 1.7182667833249814, "learning_rate": 6.022503471777444e-07, "loss": 0.5309, "step": 2868 }, { "epoch": 0.46289125524362695, "grad_norm": 1.294137984991907, "learning_rate": 6.01989191369412e-07, "loss": 0.4293, "step": 2869 }, { "epoch": 0.46305259761213297, "grad_norm": 1.7776379016976438, "learning_rate": 6.01728006526317e-07, "loss": 0.4186, "step": 2870 }, { "epoch": 0.4632139399806389, "grad_norm": 1.5574775848490177, "learning_rate": 6.014667927228144e-07, "loss": 0.4703, "step": 2871 }, { "epoch": 0.4633752823491449, "grad_norm": 2.1821500478844102, "learning_rate": 6.012055500332681e-07, "loss": 0.6458, "step": 2872 }, { "epoch": 0.46353662471765084, "grad_norm": 1.5897081022013029, "learning_rate": 6.009442785320499e-07, "loss": 0.7031, "step": 2873 }, { "epoch": 0.46369796708615685, "grad_norm": 1.966492221121457, "learning_rate": 6.006829782935396e-07, "loss": 0.483, "step": 2874 }, { "epoch": 0.4638593094546628, "grad_norm": 1.9072571267112222, "learning_rate": 6.004216493921255e-07, "loss": 0.4321, "step": 2875 }, { "epoch": 0.46402065182316876, "grad_norm": 1.4412965810349978, "learning_rate": 6.00160291902204e-07, "loss": 0.3975, "step": 2876 }, { "epoch": 0.4641819941916747, "grad_norm": 1.375206671701595, "learning_rate": 5.998989058981795e-07, "loss": 0.4981, "step": 2877 }, { "epoch": 0.46434333656018073, "grad_norm": 1.7090205771835207, "learning_rate": 5.996374914544644e-07, "loss": 0.3877, "step": 2878 }, { "epoch": 0.4645046789286867, "grad_norm": 2.253919049363842, "learning_rate": 5.993760486454798e-07, "loss": 0.6582, "step": 2879 }, { "epoch": 0.46466602129719264, "grad_norm": 1.573557068601114, "learning_rate": 5.991145775456542e-07, "loss": 0.5707, "step": 2880 }, { "epoch": 0.4648273636656986, "grad_norm": 1.7211405688414274, "learning_rate": 5.988530782294245e-07, "loss": 0.467, "step": 2881 }, { "epoch": 0.46498870603420456, "grad_norm": 1.9100314296513183, "learning_rate": 5.985915507712356e-07, "loss": 0.4083, "step": 2882 }, { "epoch": 0.46515004840271057, "grad_norm": 2.701634078569584, "learning_rate": 5.983299952455403e-07, "loss": 0.5926, "step": 2883 }, { "epoch": 0.4653113907712165, "grad_norm": 1.5627469439869501, "learning_rate": 5.980684117267994e-07, "loss": 0.5457, "step": 2884 }, { "epoch": 0.4654727331397225, "grad_norm": 1.3955688463215647, "learning_rate": 5.978068002894816e-07, "loss": 0.4218, "step": 2885 }, { "epoch": 0.46563407550822844, "grad_norm": 1.475373207981282, "learning_rate": 5.975451610080642e-07, "loss": 0.4455, "step": 2886 }, { "epoch": 0.46579541787673445, "grad_norm": 1.253801097798441, "learning_rate": 5.972834939570313e-07, "loss": 0.4709, "step": 2887 }, { "epoch": 0.4659567602452404, "grad_norm": 1.7542302229626645, "learning_rate": 5.970217992108759e-07, "loss": 0.5325, "step": 2888 }, { "epoch": 0.46611810261374637, "grad_norm": 1.8454255071884607, "learning_rate": 5.967600768440984e-07, "loss": 0.5948, "step": 2889 }, { "epoch": 0.4662794449822523, "grad_norm": 1.680684632037475, "learning_rate": 5.96498326931207e-07, "loss": 0.5003, "step": 2890 }, { "epoch": 0.46644078735075833, "grad_norm": 1.6212801538867627, "learning_rate": 5.962365495467182e-07, "loss": 0.5243, "step": 2891 }, { "epoch": 0.4666021297192643, "grad_norm": 1.390880518380102, "learning_rate": 5.959747447651558e-07, "loss": 0.4258, "step": 2892 }, { "epoch": 0.46676347208777025, "grad_norm": 1.8555122450699737, "learning_rate": 5.957129126610516e-07, "loss": 0.6146, "step": 2893 }, { "epoch": 0.4669248144562762, "grad_norm": 2.197152232317169, "learning_rate": 5.954510533089452e-07, "loss": 0.5743, "step": 2894 }, { "epoch": 0.46708615682478216, "grad_norm": 1.5955622971583276, "learning_rate": 5.95189166783384e-07, "loss": 0.5106, "step": 2895 }, { "epoch": 0.4672474991932882, "grad_norm": 1.9999637004419173, "learning_rate": 5.949272531589232e-07, "loss": 0.4993, "step": 2896 }, { "epoch": 0.46740884156179413, "grad_norm": 2.9854882845082535, "learning_rate": 5.946653125101254e-07, "loss": 0.6165, "step": 2897 }, { "epoch": 0.4675701839303001, "grad_norm": 1.5295148867044455, "learning_rate": 5.944033449115611e-07, "loss": 0.3808, "step": 2898 }, { "epoch": 0.46773152629880604, "grad_norm": 1.8714724736799688, "learning_rate": 5.941413504378088e-07, "loss": 0.4606, "step": 2899 }, { "epoch": 0.46789286866731206, "grad_norm": 1.6082161740470187, "learning_rate": 5.938793291634539e-07, "loss": 0.6084, "step": 2900 }, { "epoch": 0.468054211035818, "grad_norm": 2.001520889886025, "learning_rate": 5.9361728116309e-07, "loss": 0.7256, "step": 2901 }, { "epoch": 0.46821555340432397, "grad_norm": 1.1842199500863406, "learning_rate": 5.93355206511318e-07, "loss": 0.4142, "step": 2902 }, { "epoch": 0.4683768957728299, "grad_norm": 1.3920080615171944, "learning_rate": 5.930931052827471e-07, "loss": 0.4548, "step": 2903 }, { "epoch": 0.46853823814133594, "grad_norm": 1.7072853580130982, "learning_rate": 5.928309775519929e-07, "loss": 0.4064, "step": 2904 }, { "epoch": 0.4686995805098419, "grad_norm": 1.7139229702905552, "learning_rate": 5.925688233936796e-07, "loss": 0.3863, "step": 2905 }, { "epoch": 0.46886092287834785, "grad_norm": 1.9958337663027532, "learning_rate": 5.92306642882438e-07, "loss": 0.4968, "step": 2906 }, { "epoch": 0.4690222652468538, "grad_norm": 1.9300165552209971, "learning_rate": 5.920444360929071e-07, "loss": 0.7085, "step": 2907 }, { "epoch": 0.4691836076153598, "grad_norm": 1.618687841333991, "learning_rate": 5.917822030997335e-07, "loss": 0.6073, "step": 2908 }, { "epoch": 0.4693449499838658, "grad_norm": 1.571586737166143, "learning_rate": 5.915199439775706e-07, "loss": 0.4188, "step": 2909 }, { "epoch": 0.46950629235237173, "grad_norm": 1.263080016122732, "learning_rate": 5.912576588010795e-07, "loss": 0.3919, "step": 2910 }, { "epoch": 0.4696676347208777, "grad_norm": 1.173829242115473, "learning_rate": 5.909953476449288e-07, "loss": 0.4567, "step": 2911 }, { "epoch": 0.46982897708938365, "grad_norm": 1.7700178475207509, "learning_rate": 5.907330105837944e-07, "loss": 0.5101, "step": 2912 }, { "epoch": 0.46999031945788966, "grad_norm": 1.4425456188267463, "learning_rate": 5.904706476923601e-07, "loss": 0.5534, "step": 2913 }, { "epoch": 0.4701516618263956, "grad_norm": 1.6984768354954747, "learning_rate": 5.902082590453162e-07, "loss": 0.4718, "step": 2914 }, { "epoch": 0.4703130041949016, "grad_norm": 1.6422761147324996, "learning_rate": 5.899458447173608e-07, "loss": 0.501, "step": 2915 }, { "epoch": 0.47047434656340753, "grad_norm": 1.3892345978379228, "learning_rate": 5.896834047831993e-07, "loss": 0.5259, "step": 2916 }, { "epoch": 0.47063568893191354, "grad_norm": 1.2135028860201662, "learning_rate": 5.894209393175444e-07, "loss": 0.427, "step": 2917 }, { "epoch": 0.4707970313004195, "grad_norm": 1.6267213507486669, "learning_rate": 5.891584483951156e-07, "loss": 0.5637, "step": 2918 }, { "epoch": 0.47095837366892546, "grad_norm": 1.7355357229633053, "learning_rate": 5.888959320906406e-07, "loss": 0.6218, "step": 2919 }, { "epoch": 0.4711197160374314, "grad_norm": 1.294034121362716, "learning_rate": 5.886333904788534e-07, "loss": 0.514, "step": 2920 }, { "epoch": 0.4712810584059374, "grad_norm": 1.38763870027138, "learning_rate": 5.883708236344959e-07, "loss": 0.4983, "step": 2921 }, { "epoch": 0.4714424007744434, "grad_norm": 1.4643275457647447, "learning_rate": 5.881082316323166e-07, "loss": 0.496, "step": 2922 }, { "epoch": 0.47160374314294934, "grad_norm": 1.3210446285994062, "learning_rate": 5.878456145470716e-07, "loss": 0.6214, "step": 2923 }, { "epoch": 0.4717650855114553, "grad_norm": 1.989713621284106, "learning_rate": 5.875829724535237e-07, "loss": 0.6347, "step": 2924 }, { "epoch": 0.47192642787996125, "grad_norm": 1.8719326360678235, "learning_rate": 5.873203054264433e-07, "loss": 0.6558, "step": 2925 }, { "epoch": 0.47208777024846726, "grad_norm": 1.862490824222364, "learning_rate": 5.870576135406077e-07, "loss": 0.5568, "step": 2926 }, { "epoch": 0.4722491126169732, "grad_norm": 1.6925997498726753, "learning_rate": 5.867948968708012e-07, "loss": 0.5847, "step": 2927 }, { "epoch": 0.4724104549854792, "grad_norm": 1.675151359779401, "learning_rate": 5.865321554918153e-07, "loss": 0.3833, "step": 2928 }, { "epoch": 0.47257179735398513, "grad_norm": 1.283867604441341, "learning_rate": 5.862693894784482e-07, "loss": 0.3642, "step": 2929 }, { "epoch": 0.47273313972249115, "grad_norm": 1.8059595422279155, "learning_rate": 5.860065989055056e-07, "loss": 0.4748, "step": 2930 }, { "epoch": 0.4728944820909971, "grad_norm": 1.4572321776253898, "learning_rate": 5.857437838478e-07, "loss": 0.4259, "step": 2931 }, { "epoch": 0.47305582445950306, "grad_norm": 1.8735803952097847, "learning_rate": 5.854809443801506e-07, "loss": 0.5277, "step": 2932 }, { "epoch": 0.473217166828009, "grad_norm": 1.4553519791320078, "learning_rate": 5.85218080577384e-07, "loss": 0.3828, "step": 2933 }, { "epoch": 0.47337850919651503, "grad_norm": 1.5362749755418377, "learning_rate": 5.849551925143333e-07, "loss": 0.5469, "step": 2934 }, { "epoch": 0.473539851565021, "grad_norm": 1.6163484174911282, "learning_rate": 5.846922802658388e-07, "loss": 0.4539, "step": 2935 }, { "epoch": 0.47370119393352694, "grad_norm": 1.5307286211782727, "learning_rate": 5.844293439067478e-07, "loss": 0.4638, "step": 2936 }, { "epoch": 0.4738625363020329, "grad_norm": 1.668729800555672, "learning_rate": 5.84166383511914e-07, "loss": 0.2897, "step": 2937 }, { "epoch": 0.47402387867053886, "grad_norm": 1.7248290322760327, "learning_rate": 5.839033991561981e-07, "loss": 0.5824, "step": 2938 }, { "epoch": 0.47418522103904487, "grad_norm": 1.9589481065155852, "learning_rate": 5.836403909144682e-07, "loss": 0.4143, "step": 2939 }, { "epoch": 0.4743465634075508, "grad_norm": 1.785291403698053, "learning_rate": 5.833773588615982e-07, "loss": 0.6631, "step": 2940 }, { "epoch": 0.4745079057760568, "grad_norm": 5.1943520023781, "learning_rate": 5.831143030724695e-07, "loss": 0.5882, "step": 2941 }, { "epoch": 0.47466924814456274, "grad_norm": 1.2209579323903552, "learning_rate": 5.8285122362197e-07, "loss": 0.3802, "step": 2942 }, { "epoch": 0.47483059051306875, "grad_norm": 1.735002009146019, "learning_rate": 5.825881205849948e-07, "loss": 0.7127, "step": 2943 }, { "epoch": 0.4749919328815747, "grad_norm": 1.4744847933456267, "learning_rate": 5.823249940364447e-07, "loss": 0.4708, "step": 2944 }, { "epoch": 0.47515327525008066, "grad_norm": 1.5919809433932295, "learning_rate": 5.820618440512283e-07, "loss": 0.4077, "step": 2945 }, { "epoch": 0.4753146176185866, "grad_norm": 1.4610208905741533, "learning_rate": 5.8179867070426e-07, "loss": 0.6073, "step": 2946 }, { "epoch": 0.47547595998709263, "grad_norm": 1.9824787601521348, "learning_rate": 5.815354740704613e-07, "loss": 0.7388, "step": 2947 }, { "epoch": 0.4756373023555986, "grad_norm": 1.748949689391244, "learning_rate": 5.812722542247606e-07, "loss": 0.5087, "step": 2948 }, { "epoch": 0.47579864472410455, "grad_norm": 2.3762716603182374, "learning_rate": 5.810090112420922e-07, "loss": 0.5584, "step": 2949 }, { "epoch": 0.4759599870926105, "grad_norm": 1.5937258400675287, "learning_rate": 5.807457451973975e-07, "loss": 0.4794, "step": 2950 }, { "epoch": 0.4761213294611165, "grad_norm": 1.8851233898829058, "learning_rate": 5.804824561656241e-07, "loss": 0.6258, "step": 2951 }, { "epoch": 0.4762826718296225, "grad_norm": 2.0338011932963753, "learning_rate": 5.802191442217268e-07, "loss": 0.6282, "step": 2952 }, { "epoch": 0.47644401419812843, "grad_norm": 1.5647808974580308, "learning_rate": 5.79955809440666e-07, "loss": 0.5348, "step": 2953 }, { "epoch": 0.4766053565666344, "grad_norm": 1.6164391301804004, "learning_rate": 5.796924518974094e-07, "loss": 0.5854, "step": 2954 }, { "epoch": 0.47676669893514034, "grad_norm": 1.819685209843778, "learning_rate": 5.794290716669307e-07, "loss": 0.5721, "step": 2955 }, { "epoch": 0.47692804130364636, "grad_norm": 1.3999989066800889, "learning_rate": 5.791656688242104e-07, "loss": 0.3899, "step": 2956 }, { "epoch": 0.4770893836721523, "grad_norm": 1.539498436857931, "learning_rate": 5.789022434442351e-07, "loss": 0.4372, "step": 2957 }, { "epoch": 0.47725072604065827, "grad_norm": 1.6910091929622924, "learning_rate": 5.786387956019979e-07, "loss": 0.5027, "step": 2958 }, { "epoch": 0.4774120684091642, "grad_norm": 1.6851371894929235, "learning_rate": 5.783753253724986e-07, "loss": 0.4939, "step": 2959 }, { "epoch": 0.47757341077767024, "grad_norm": 1.5523457742648084, "learning_rate": 5.781118328307431e-07, "loss": 0.3512, "step": 2960 }, { "epoch": 0.4777347531461762, "grad_norm": 1.7726396959186688, "learning_rate": 5.778483180517436e-07, "loss": 0.5744, "step": 2961 }, { "epoch": 0.47789609551468215, "grad_norm": 1.276750899458539, "learning_rate": 5.775847811105188e-07, "loss": 0.5094, "step": 2962 }, { "epoch": 0.4780574378831881, "grad_norm": 1.6619470608952571, "learning_rate": 5.773212220820936e-07, "loss": 0.4447, "step": 2963 }, { "epoch": 0.4782187802516941, "grad_norm": 2.534983295483449, "learning_rate": 5.770576410414991e-07, "loss": 0.5908, "step": 2964 }, { "epoch": 0.4783801226202001, "grad_norm": 1.3651182373594735, "learning_rate": 5.767940380637729e-07, "loss": 0.4428, "step": 2965 }, { "epoch": 0.47854146498870603, "grad_norm": 1.7253773622044095, "learning_rate": 5.765304132239589e-07, "loss": 0.532, "step": 2966 }, { "epoch": 0.478702807357212, "grad_norm": 1.6486391061735692, "learning_rate": 5.762667665971068e-07, "loss": 0.6532, "step": 2967 }, { "epoch": 0.47886414972571795, "grad_norm": 2.1706112265376407, "learning_rate": 5.760030982582728e-07, "loss": 0.6745, "step": 2968 }, { "epoch": 0.47902549209422396, "grad_norm": 1.743846633585269, "learning_rate": 5.757394082825195e-07, "loss": 0.46, "step": 2969 }, { "epoch": 0.4791868344627299, "grad_norm": 5.291611658646433, "learning_rate": 5.754756967449152e-07, "loss": 0.5388, "step": 2970 }, { "epoch": 0.4793481768312359, "grad_norm": 1.8608452249577796, "learning_rate": 5.752119637205345e-07, "loss": 0.5453, "step": 2971 }, { "epoch": 0.47950951919974183, "grad_norm": 1.4698487088466599, "learning_rate": 5.749482092844582e-07, "loss": 0.5657, "step": 2972 }, { "epoch": 0.47967086156824784, "grad_norm": 1.5667834505558416, "learning_rate": 5.746844335117731e-07, "loss": 0.5148, "step": 2973 }, { "epoch": 0.4798322039367538, "grad_norm": 1.378568613358679, "learning_rate": 5.744206364775724e-07, "loss": 0.4676, "step": 2974 }, { "epoch": 0.47999354630525976, "grad_norm": 1.7774395256646622, "learning_rate": 5.741568182569547e-07, "loss": 0.4161, "step": 2975 }, { "epoch": 0.4801548886737657, "grad_norm": 1.6115692143266165, "learning_rate": 5.738929789250252e-07, "loss": 0.4845, "step": 2976 }, { "epoch": 0.4803162310422717, "grad_norm": 1.6379911079398268, "learning_rate": 5.73629118556895e-07, "loss": 0.4894, "step": 2977 }, { "epoch": 0.4804775734107777, "grad_norm": 1.5189174795231075, "learning_rate": 5.733652372276809e-07, "loss": 0.4554, "step": 2978 }, { "epoch": 0.48063891577928364, "grad_norm": 1.304946142817276, "learning_rate": 5.731013350125061e-07, "loss": 0.3216, "step": 2979 }, { "epoch": 0.4808002581477896, "grad_norm": 1.72403030783085, "learning_rate": 5.728374119864994e-07, "loss": 0.5961, "step": 2980 }, { "epoch": 0.4809616005162956, "grad_norm": 1.2889374643431328, "learning_rate": 5.725734682247956e-07, "loss": 0.643, "step": 2981 }, { "epoch": 0.48112294288480156, "grad_norm": 1.351673011451314, "learning_rate": 5.723095038025355e-07, "loss": 0.4704, "step": 2982 }, { "epoch": 0.4812842852533075, "grad_norm": 1.8689635062331276, "learning_rate": 5.720455187948658e-07, "loss": 0.6186, "step": 2983 }, { "epoch": 0.4814456276218135, "grad_norm": 2.0043181057483768, "learning_rate": 5.71781513276939e-07, "loss": 0.612, "step": 2984 }, { "epoch": 0.48160696999031943, "grad_norm": 2.1261999444827517, "learning_rate": 5.715174873239135e-07, "loss": 0.464, "step": 2985 }, { "epoch": 0.48176831235882545, "grad_norm": 2.0151728633063586, "learning_rate": 5.712534410109531e-07, "loss": 0.6655, "step": 2986 }, { "epoch": 0.4819296547273314, "grad_norm": 1.4936597661173459, "learning_rate": 5.709893744132279e-07, "loss": 0.4292, "step": 2987 }, { "epoch": 0.48209099709583736, "grad_norm": 1.9088065716966887, "learning_rate": 5.707252876059139e-07, "loss": 0.6145, "step": 2988 }, { "epoch": 0.4822523394643433, "grad_norm": 1.6780461298134024, "learning_rate": 5.704611806641925e-07, "loss": 0.479, "step": 2989 }, { "epoch": 0.48241368183284933, "grad_norm": 1.4895339459677264, "learning_rate": 5.701970536632507e-07, "loss": 0.5089, "step": 2990 }, { "epoch": 0.4825750242013553, "grad_norm": 1.6982539803357077, "learning_rate": 5.699329066782813e-07, "loss": 0.5373, "step": 2991 }, { "epoch": 0.48273636656986124, "grad_norm": 0.9232744812083805, "learning_rate": 5.696687397844833e-07, "loss": 0.3764, "step": 2992 }, { "epoch": 0.4828977089383672, "grad_norm": 1.9285372498929996, "learning_rate": 5.694045530570606e-07, "loss": 0.7395, "step": 2993 }, { "epoch": 0.4830590513068732, "grad_norm": 1.8277123140298763, "learning_rate": 5.691403465712234e-07, "loss": 0.4369, "step": 2994 }, { "epoch": 0.48322039367537917, "grad_norm": 1.394266610987638, "learning_rate": 5.68876120402187e-07, "loss": 0.3189, "step": 2995 }, { "epoch": 0.4833817360438851, "grad_norm": 1.5466491071893629, "learning_rate": 5.686118746251729e-07, "loss": 0.3702, "step": 2996 }, { "epoch": 0.4835430784123911, "grad_norm": 1.4086611527974262, "learning_rate": 5.683476093154076e-07, "loss": 0.573, "step": 2997 }, { "epoch": 0.48370442078089704, "grad_norm": 1.8951526999033423, "learning_rate": 5.680833245481234e-07, "loss": 0.5425, "step": 2998 }, { "epoch": 0.48386576314940305, "grad_norm": 1.861413367321939, "learning_rate": 5.67819020398558e-07, "loss": 0.5704, "step": 2999 }, { "epoch": 0.484027105517909, "grad_norm": 2.113330438573802, "learning_rate": 5.675546969419549e-07, "loss": 0.7056, "step": 3000 }, { "epoch": 0.48418844788641496, "grad_norm": 1.5489304113105409, "learning_rate": 5.672903542535631e-07, "loss": 0.2757, "step": 3001 }, { "epoch": 0.4843497902549209, "grad_norm": 2.0369891737442827, "learning_rate": 5.670259924086367e-07, "loss": 0.5805, "step": 3002 }, { "epoch": 0.48451113262342693, "grad_norm": 1.3790956966814027, "learning_rate": 5.667616114824356e-07, "loss": 0.4625, "step": 3003 }, { "epoch": 0.4846724749919329, "grad_norm": 2.129230887482777, "learning_rate": 5.664972115502247e-07, "loss": 0.4636, "step": 3004 }, { "epoch": 0.48483381736043885, "grad_norm": 2.023888732791941, "learning_rate": 5.662327926872749e-07, "loss": 0.5055, "step": 3005 }, { "epoch": 0.4849951597289448, "grad_norm": 1.420066773631101, "learning_rate": 5.659683549688623e-07, "loss": 0.3255, "step": 3006 }, { "epoch": 0.4851565020974508, "grad_norm": 1.6297942378053796, "learning_rate": 5.657038984702682e-07, "loss": 0.4883, "step": 3007 }, { "epoch": 0.48531784446595677, "grad_norm": 1.7724519923801487, "learning_rate": 5.65439423266779e-07, "loss": 0.5964, "step": 3008 }, { "epoch": 0.48547918683446273, "grad_norm": 1.4151417528462666, "learning_rate": 5.651749294336871e-07, "loss": 0.4201, "step": 3009 }, { "epoch": 0.4856405292029687, "grad_norm": 1.317262636985261, "learning_rate": 5.649104170462898e-07, "loss": 0.4677, "step": 3010 }, { "epoch": 0.48580187157147464, "grad_norm": 1.804503262185784, "learning_rate": 5.646458861798898e-07, "loss": 0.5804, "step": 3011 }, { "epoch": 0.48596321393998065, "grad_norm": 1.7365295499291395, "learning_rate": 5.643813369097946e-07, "loss": 0.5234, "step": 3012 }, { "epoch": 0.4861245563084866, "grad_norm": 1.5527283866334995, "learning_rate": 5.641167693113177e-07, "loss": 0.5907, "step": 3013 }, { "epoch": 0.48628589867699257, "grad_norm": 1.6879546647921788, "learning_rate": 5.638521834597774e-07, "loss": 0.419, "step": 3014 }, { "epoch": 0.4864472410454985, "grad_norm": 1.7578454586753902, "learning_rate": 5.635875794304971e-07, "loss": 0.6022, "step": 3015 }, { "epoch": 0.48660858341400454, "grad_norm": 1.4649415657445666, "learning_rate": 5.633229572988056e-07, "loss": 0.3461, "step": 3016 }, { "epoch": 0.4867699257825105, "grad_norm": 2.0581028622648088, "learning_rate": 5.630583171400368e-07, "loss": 0.5975, "step": 3017 }, { "epoch": 0.48693126815101645, "grad_norm": 1.693613352313269, "learning_rate": 5.627936590295297e-07, "loss": 0.5189, "step": 3018 }, { "epoch": 0.4870926105195224, "grad_norm": 1.2329852332493978, "learning_rate": 5.625289830426284e-07, "loss": 0.5363, "step": 3019 }, { "epoch": 0.4872539528880284, "grad_norm": 1.3213669668662007, "learning_rate": 5.622642892546822e-07, "loss": 0.1987, "step": 3020 }, { "epoch": 0.4874152952565344, "grad_norm": 1.3941453672303286, "learning_rate": 5.619995777410451e-07, "loss": 0.5759, "step": 3021 }, { "epoch": 0.48757663762504033, "grad_norm": 2.225699167020102, "learning_rate": 5.617348485770766e-07, "loss": 0.602, "step": 3022 }, { "epoch": 0.4877379799935463, "grad_norm": 2.065028288234153, "learning_rate": 5.614701018381412e-07, "loss": 0.7535, "step": 3023 }, { "epoch": 0.4878993223620523, "grad_norm": 1.5292364622536183, "learning_rate": 5.612053375996081e-07, "loss": 0.4752, "step": 3024 }, { "epoch": 0.48806066473055826, "grad_norm": 1.8029853753519218, "learning_rate": 5.609405559368516e-07, "loss": 0.5908, "step": 3025 }, { "epoch": 0.4882220070990642, "grad_norm": 1.554300384632567, "learning_rate": 5.60675756925251e-07, "loss": 0.3581, "step": 3026 }, { "epoch": 0.48838334946757017, "grad_norm": 1.7880937377159534, "learning_rate": 5.604109406401903e-07, "loss": 0.4972, "step": 3027 }, { "epoch": 0.48854469183607613, "grad_norm": 1.7673073914296862, "learning_rate": 5.601461071570591e-07, "loss": 0.5926, "step": 3028 }, { "epoch": 0.48870603420458214, "grad_norm": 2.107743600181589, "learning_rate": 5.598812565512512e-07, "loss": 0.5839, "step": 3029 }, { "epoch": 0.4888673765730881, "grad_norm": 2.2791948859532654, "learning_rate": 5.596163888981655e-07, "loss": 0.739, "step": 3030 }, { "epoch": 0.48902871894159405, "grad_norm": 1.7583156967483862, "learning_rate": 5.593515042732059e-07, "loss": 0.5255, "step": 3031 }, { "epoch": 0.4891900613101, "grad_norm": 1.722282178751211, "learning_rate": 5.590866027517809e-07, "loss": 0.598, "step": 3032 }, { "epoch": 0.489351403678606, "grad_norm": 1.8902065349940154, "learning_rate": 5.588216844093037e-07, "loss": 0.5161, "step": 3033 }, { "epoch": 0.489512746047112, "grad_norm": 1.797958975861751, "learning_rate": 5.585567493211929e-07, "loss": 0.5857, "step": 3034 }, { "epoch": 0.48967408841561794, "grad_norm": 2.108480645362071, "learning_rate": 5.582917975628713e-07, "loss": 0.5926, "step": 3035 }, { "epoch": 0.4898354307841239, "grad_norm": 0.9925392908923802, "learning_rate": 5.580268292097666e-07, "loss": 0.4508, "step": 3036 }, { "epoch": 0.4899967731526299, "grad_norm": 1.8164414925386505, "learning_rate": 5.577618443373112e-07, "loss": 0.3719, "step": 3037 }, { "epoch": 0.49015811552113586, "grad_norm": 1.6990409670376165, "learning_rate": 5.574968430209422e-07, "loss": 0.4843, "step": 3038 }, { "epoch": 0.4903194578896418, "grad_norm": 1.1009627528979977, "learning_rate": 5.572318253361016e-07, "loss": 0.4434, "step": 3039 }, { "epoch": 0.4904808002581478, "grad_norm": 1.5847076758964236, "learning_rate": 5.569667913582357e-07, "loss": 0.6011, "step": 3040 }, { "epoch": 0.49064214262665373, "grad_norm": 1.600869824484892, "learning_rate": 5.567017411627958e-07, "loss": 0.4564, "step": 3041 }, { "epoch": 0.49080348499515974, "grad_norm": 1.4471159534565339, "learning_rate": 5.564366748252374e-07, "loss": 0.5753, "step": 3042 }, { "epoch": 0.4909648273636657, "grad_norm": 1.4732732719733745, "learning_rate": 5.561715924210212e-07, "loss": 0.509, "step": 3043 }, { "epoch": 0.49112616973217166, "grad_norm": 1.7785217405700438, "learning_rate": 5.559064940256116e-07, "loss": 0.5818, "step": 3044 }, { "epoch": 0.4912875121006776, "grad_norm": 1.1201674725287891, "learning_rate": 5.556413797144783e-07, "loss": 0.5152, "step": 3045 }, { "epoch": 0.4914488544691836, "grad_norm": 1.7834507915401918, "learning_rate": 5.553762495630956e-07, "loss": 0.5815, "step": 3046 }, { "epoch": 0.4916101968376896, "grad_norm": 1.570937758127574, "learning_rate": 5.551111036469416e-07, "loss": 0.621, "step": 3047 }, { "epoch": 0.49177153920619554, "grad_norm": 1.7182042035566172, "learning_rate": 5.548459420414993e-07, "loss": 0.5841, "step": 3048 }, { "epoch": 0.4919328815747015, "grad_norm": 1.5453037026427812, "learning_rate": 5.545807648222563e-07, "loss": 0.3981, "step": 3049 }, { "epoch": 0.4920942239432075, "grad_norm": 1.6272211300587327, "learning_rate": 5.543155720647045e-07, "loss": 0.5428, "step": 3050 }, { "epoch": 0.49225556631171347, "grad_norm": 2.090906518248532, "learning_rate": 5.540503638443401e-07, "loss": 0.4844, "step": 3051 }, { "epoch": 0.4924169086802194, "grad_norm": 1.5031561231662474, "learning_rate": 5.53785140236664e-07, "loss": 0.5021, "step": 3052 }, { "epoch": 0.4925782510487254, "grad_norm": 1.3392528420910026, "learning_rate": 5.535199013171812e-07, "loss": 0.3789, "step": 3053 }, { "epoch": 0.4927395934172314, "grad_norm": 1.4771246268988658, "learning_rate": 5.532546471614012e-07, "loss": 0.6084, "step": 3054 }, { "epoch": 0.49290093578573735, "grad_norm": 1.5698769188276973, "learning_rate": 5.529893778448377e-07, "loss": 0.4658, "step": 3055 }, { "epoch": 0.4930622781542433, "grad_norm": 1.345297521137848, "learning_rate": 5.527240934430088e-07, "loss": 0.5101, "step": 3056 }, { "epoch": 0.49322362052274926, "grad_norm": 1.4607661345643719, "learning_rate": 5.52458794031437e-07, "loss": 0.3667, "step": 3057 }, { "epoch": 0.4933849628912552, "grad_norm": 1.6047777605533426, "learning_rate": 5.521934796856491e-07, "loss": 0.4818, "step": 3058 }, { "epoch": 0.49354630525976123, "grad_norm": 1.3008229017032888, "learning_rate": 5.519281504811759e-07, "loss": 0.5706, "step": 3059 }, { "epoch": 0.4937076476282672, "grad_norm": 1.639206045608952, "learning_rate": 5.516628064935526e-07, "loss": 0.5449, "step": 3060 }, { "epoch": 0.49386898999677314, "grad_norm": 1.7056459440550837, "learning_rate": 5.513974477983185e-07, "loss": 0.6651, "step": 3061 }, { "epoch": 0.4940303323652791, "grad_norm": 1.437988032549851, "learning_rate": 5.51132074471017e-07, "loss": 0.6124, "step": 3062 }, { "epoch": 0.4941916747337851, "grad_norm": 1.718118725068258, "learning_rate": 5.508666865871963e-07, "loss": 0.5795, "step": 3063 }, { "epoch": 0.49435301710229107, "grad_norm": 1.8070678205867297, "learning_rate": 5.506012842224081e-07, "loss": 0.5619, "step": 3064 }, { "epoch": 0.494514359470797, "grad_norm": 1.588205754352363, "learning_rate": 5.503358674522082e-07, "loss": 0.4304, "step": 3065 }, { "epoch": 0.494675701839303, "grad_norm": 1.8045459378029796, "learning_rate": 5.500704363521566e-07, "loss": 0.5361, "step": 3066 }, { "epoch": 0.494837044207809, "grad_norm": 2.1177933730429275, "learning_rate": 5.49804990997818e-07, "loss": 0.6939, "step": 3067 }, { "epoch": 0.49499838657631495, "grad_norm": 1.8762127451085704, "learning_rate": 5.495395314647601e-07, "loss": 0.536, "step": 3068 }, { "epoch": 0.4951597289448209, "grad_norm": 1.4748456550922895, "learning_rate": 5.492740578285556e-07, "loss": 0.541, "step": 3069 }, { "epoch": 0.49532107131332687, "grad_norm": 1.4522719802073023, "learning_rate": 5.490085701647804e-07, "loss": 0.6361, "step": 3070 }, { "epoch": 0.4954824136818328, "grad_norm": 2.35941228773955, "learning_rate": 5.487430685490149e-07, "loss": 0.5591, "step": 3071 }, { "epoch": 0.49564375605033884, "grad_norm": 1.4870470591565823, "learning_rate": 5.484775530568434e-07, "loss": 0.5965, "step": 3072 }, { "epoch": 0.4958050984188448, "grad_norm": 1.4491203665434274, "learning_rate": 5.482120237638541e-07, "loss": 0.4582, "step": 3073 }, { "epoch": 0.49596644078735075, "grad_norm": 1.7515096283826876, "learning_rate": 5.479464807456391e-07, "loss": 0.5885, "step": 3074 }, { "epoch": 0.4961277831558567, "grad_norm": 1.4944639566669011, "learning_rate": 5.476809240777945e-07, "loss": 0.3692, "step": 3075 }, { "epoch": 0.4962891255243627, "grad_norm": 1.3098174792820938, "learning_rate": 5.474153538359201e-07, "loss": 0.4445, "step": 3076 }, { "epoch": 0.4964504678928687, "grad_norm": 2.2175076727320446, "learning_rate": 5.471497700956199e-07, "loss": 0.5805, "step": 3077 }, { "epoch": 0.49661181026137463, "grad_norm": 1.448164811128357, "learning_rate": 5.468841729325013e-07, "loss": 0.4511, "step": 3078 }, { "epoch": 0.4967731526298806, "grad_norm": 1.421342687209831, "learning_rate": 5.466185624221757e-07, "loss": 0.6715, "step": 3079 }, { "epoch": 0.4969344949983866, "grad_norm": 1.4324806319919667, "learning_rate": 5.463529386402586e-07, "loss": 0.4759, "step": 3080 }, { "epoch": 0.49709583736689256, "grad_norm": 1.517024308610257, "learning_rate": 5.460873016623689e-07, "loss": 0.4345, "step": 3081 }, { "epoch": 0.4972571797353985, "grad_norm": 1.7846909465432557, "learning_rate": 5.458216515641296e-07, "loss": 0.4208, "step": 3082 }, { "epoch": 0.49741852210390447, "grad_norm": 1.8919410853820582, "learning_rate": 5.455559884211668e-07, "loss": 0.5828, "step": 3083 }, { "epoch": 0.4975798644724105, "grad_norm": 1.7772183216817068, "learning_rate": 5.452903123091108e-07, "loss": 0.5413, "step": 3084 }, { "epoch": 0.49774120684091644, "grad_norm": 1.8442395093861743, "learning_rate": 5.450246233035958e-07, "loss": 0.4269, "step": 3085 }, { "epoch": 0.4979025492094224, "grad_norm": 1.7728735745576294, "learning_rate": 5.447589214802593e-07, "loss": 0.6226, "step": 3086 }, { "epoch": 0.49806389157792835, "grad_norm": 2.429166780954907, "learning_rate": 5.444932069147426e-07, "loss": 0.6581, "step": 3087 }, { "epoch": 0.4982252339464343, "grad_norm": 1.2454070587825612, "learning_rate": 5.442274796826903e-07, "loss": 0.5045, "step": 3088 }, { "epoch": 0.4983865763149403, "grad_norm": 1.2852084256511886, "learning_rate": 5.439617398597513e-07, "loss": 0.4482, "step": 3089 }, { "epoch": 0.4985479186834463, "grad_norm": 1.6737081869769916, "learning_rate": 5.436959875215773e-07, "loss": 0.7206, "step": 3090 }, { "epoch": 0.49870926105195224, "grad_norm": 1.5912192952564124, "learning_rate": 5.43430222743824e-07, "loss": 0.5253, "step": 3091 }, { "epoch": 0.4988706034204582, "grad_norm": 1.343784021346096, "learning_rate": 5.431644456021507e-07, "loss": 0.4796, "step": 3092 }, { "epoch": 0.4990319457889642, "grad_norm": 1.8390416302140966, "learning_rate": 5.428986561722199e-07, "loss": 0.5614, "step": 3093 }, { "epoch": 0.49919328815747016, "grad_norm": 1.7392477370435637, "learning_rate": 5.426328545296979e-07, "loss": 0.3454, "step": 3094 }, { "epoch": 0.4993546305259761, "grad_norm": 1.1588269977027885, "learning_rate": 5.423670407502545e-07, "loss": 0.5196, "step": 3095 }, { "epoch": 0.4995159728944821, "grad_norm": 1.8001214595717745, "learning_rate": 5.421012149095626e-07, "loss": 0.5682, "step": 3096 }, { "epoch": 0.4996773152629881, "grad_norm": 1.795838828495171, "learning_rate": 5.418353770832988e-07, "loss": 0.4396, "step": 3097 }, { "epoch": 0.49983865763149404, "grad_norm": 1.2340113430089217, "learning_rate": 5.41569527347143e-07, "loss": 0.3661, "step": 3098 }, { "epoch": 0.5, "grad_norm": 1.7846196075441347, "learning_rate": 5.413036657767787e-07, "loss": 0.4438, "step": 3099 }, { "epoch": 0.500161342368506, "grad_norm": 1.6740007522927693, "learning_rate": 5.410377924478927e-07, "loss": 0.464, "step": 3100 }, { "epoch": 0.5003226847370119, "grad_norm": 2.0278085752729322, "learning_rate": 5.407719074361748e-07, "loss": 0.5451, "step": 3101 }, { "epoch": 0.5004840271055179, "grad_norm": 1.756767945938007, "learning_rate": 5.405060108173184e-07, "loss": 0.5714, "step": 3102 }, { "epoch": 0.5006453694740238, "grad_norm": 1.9645355771659636, "learning_rate": 5.402401026670205e-07, "loss": 0.3501, "step": 3103 }, { "epoch": 0.5008067118425299, "grad_norm": 1.544879666799569, "learning_rate": 5.399741830609808e-07, "loss": 0.622, "step": 3104 }, { "epoch": 0.5009680542110359, "grad_norm": 2.089792183213095, "learning_rate": 5.397082520749029e-07, "loss": 0.5007, "step": 3105 }, { "epoch": 0.5011293965795418, "grad_norm": 1.5190158156123472, "learning_rate": 5.394423097844927e-07, "loss": 0.5647, "step": 3106 }, { "epoch": 0.5012907389480478, "grad_norm": 1.8835293366831312, "learning_rate": 5.391763562654604e-07, "loss": 0.4832, "step": 3107 }, { "epoch": 0.5014520813165537, "grad_norm": 1.09311973941294, "learning_rate": 5.389103915935187e-07, "loss": 0.3205, "step": 3108 }, { "epoch": 0.5016134236850597, "grad_norm": 1.8312134045794806, "learning_rate": 5.386444158443837e-07, "loss": 0.5901, "step": 3109 }, { "epoch": 0.5017747660535656, "grad_norm": 1.6097261360580937, "learning_rate": 5.383784290937746e-07, "loss": 0.5657, "step": 3110 }, { "epoch": 0.5019361084220716, "grad_norm": 1.2338331940804719, "learning_rate": 5.381124314174139e-07, "loss": 0.3892, "step": 3111 }, { "epoch": 0.5020974507905777, "grad_norm": 1.7597269441373493, "learning_rate": 5.37846422891027e-07, "loss": 0.4721, "step": 3112 }, { "epoch": 0.5022587931590836, "grad_norm": 1.3664896464648273, "learning_rate": 5.375804035903424e-07, "loss": 0.4775, "step": 3113 }, { "epoch": 0.5024201355275896, "grad_norm": 1.5176033400867326, "learning_rate": 5.373143735910915e-07, "loss": 0.4844, "step": 3114 }, { "epoch": 0.5025814778960955, "grad_norm": 2.263287515733636, "learning_rate": 5.370483329690094e-07, "loss": 0.6183, "step": 3115 }, { "epoch": 0.5027428202646015, "grad_norm": 1.5802723089790958, "learning_rate": 5.367822817998337e-07, "loss": 0.4658, "step": 3116 }, { "epoch": 0.5029041626331074, "grad_norm": 1.710455142791447, "learning_rate": 5.36516220159305e-07, "loss": 0.5393, "step": 3117 }, { "epoch": 0.5030655050016134, "grad_norm": 1.522684155556364, "learning_rate": 5.362501481231669e-07, "loss": 0.4231, "step": 3118 }, { "epoch": 0.5032268473701194, "grad_norm": 1.1950741199826047, "learning_rate": 5.359840657671661e-07, "loss": 0.3541, "step": 3119 }, { "epoch": 0.5033881897386253, "grad_norm": 1.862878975938789, "learning_rate": 5.357179731670522e-07, "loss": 0.5752, "step": 3120 }, { "epoch": 0.5035495321071314, "grad_norm": 1.4943119125419304, "learning_rate": 5.354518703985778e-07, "loss": 0.3572, "step": 3121 }, { "epoch": 0.5037108744756373, "grad_norm": 1.5305927968249369, "learning_rate": 5.351857575374983e-07, "loss": 0.6031, "step": 3122 }, { "epoch": 0.5038722168441433, "grad_norm": 1.3654510369785073, "learning_rate": 5.349196346595717e-07, "loss": 0.5252, "step": 3123 }, { "epoch": 0.5040335592126493, "grad_norm": 1.686809539825116, "learning_rate": 5.346535018405592e-07, "loss": 0.4793, "step": 3124 }, { "epoch": 0.5041949015811552, "grad_norm": 1.3573326026596646, "learning_rate": 5.343873591562249e-07, "loss": 0.5102, "step": 3125 }, { "epoch": 0.5043562439496612, "grad_norm": 1.7073604868559615, "learning_rate": 5.341212066823355e-07, "loss": 0.5452, "step": 3126 }, { "epoch": 0.5045175863181671, "grad_norm": 1.471656763699407, "learning_rate": 5.338550444946604e-07, "loss": 0.4703, "step": 3127 }, { "epoch": 0.5046789286866731, "grad_norm": 1.5872809822205913, "learning_rate": 5.33588872668972e-07, "loss": 0.5591, "step": 3128 }, { "epoch": 0.504840271055179, "grad_norm": 1.8576289080612138, "learning_rate": 5.333226912810454e-07, "loss": 0.4322, "step": 3129 }, { "epoch": 0.5050016134236851, "grad_norm": 1.6024053821214324, "learning_rate": 5.330565004066584e-07, "loss": 0.5011, "step": 3130 }, { "epoch": 0.5051629557921911, "grad_norm": 1.689989161484854, "learning_rate": 5.327903001215912e-07, "loss": 0.5408, "step": 3131 }, { "epoch": 0.505324298160697, "grad_norm": 1.1753095523439225, "learning_rate": 5.325240905016272e-07, "loss": 0.5189, "step": 3132 }, { "epoch": 0.505485640529203, "grad_norm": 1.6120566830667482, "learning_rate": 5.322578716225522e-07, "loss": 0.5257, "step": 3133 }, { "epoch": 0.5056469828977089, "grad_norm": 1.780334438130552, "learning_rate": 5.319916435601546e-07, "loss": 0.5377, "step": 3134 }, { "epoch": 0.5058083252662149, "grad_norm": 3.7430928514864057, "learning_rate": 5.317254063902253e-07, "loss": 0.5399, "step": 3135 }, { "epoch": 0.5059696676347208, "grad_norm": 2.115000140323138, "learning_rate": 5.314591601885583e-07, "loss": 0.7733, "step": 3136 }, { "epoch": 0.5061310100032268, "grad_norm": 1.4520909721943458, "learning_rate": 5.311929050309493e-07, "loss": 0.3756, "step": 3137 }, { "epoch": 0.5062923523717329, "grad_norm": 1.9966457969687204, "learning_rate": 5.309266409931976e-07, "loss": 0.5687, "step": 3138 }, { "epoch": 0.5064536947402388, "grad_norm": 1.601467222775727, "learning_rate": 5.306603681511043e-07, "loss": 0.5469, "step": 3139 }, { "epoch": 0.5066150371087448, "grad_norm": 2.080136580384615, "learning_rate": 5.303940865804731e-07, "loss": 0.5852, "step": 3140 }, { "epoch": 0.5067763794772507, "grad_norm": 2.096226949366001, "learning_rate": 5.301277963571105e-07, "loss": 0.4521, "step": 3141 }, { "epoch": 0.5069377218457567, "grad_norm": 1.3657421210234733, "learning_rate": 5.298614975568248e-07, "loss": 0.4563, "step": 3142 }, { "epoch": 0.5070990642142627, "grad_norm": 1.3005944195167316, "learning_rate": 5.295951902554279e-07, "loss": 0.3862, "step": 3143 }, { "epoch": 0.5072604065827686, "grad_norm": 2.076876501908794, "learning_rate": 5.29328874528733e-07, "loss": 0.5598, "step": 3144 }, { "epoch": 0.5074217489512746, "grad_norm": 1.5332766764304464, "learning_rate": 5.290625504525561e-07, "loss": 0.3884, "step": 3145 }, { "epoch": 0.5075830913197805, "grad_norm": 1.8227825515094445, "learning_rate": 5.287962181027154e-07, "loss": 0.4433, "step": 3146 }, { "epoch": 0.5077444336882866, "grad_norm": 1.9032433985068649, "learning_rate": 5.285298775550322e-07, "loss": 0.7465, "step": 3147 }, { "epoch": 0.5079057760567925, "grad_norm": 1.6912729682124, "learning_rate": 5.282635288853291e-07, "loss": 0.5281, "step": 3148 }, { "epoch": 0.5080671184252985, "grad_norm": 1.2859762990111236, "learning_rate": 5.279971721694318e-07, "loss": 0.5836, "step": 3149 }, { "epoch": 0.5082284607938045, "grad_norm": 1.7346108078672569, "learning_rate": 5.277308074831677e-07, "loss": 0.6602, "step": 3150 }, { "epoch": 0.5083898031623104, "grad_norm": 1.2059073539657066, "learning_rate": 5.274644349023669e-07, "loss": 0.4491, "step": 3151 }, { "epoch": 0.5085511455308164, "grad_norm": 1.6937078097673588, "learning_rate": 5.271980545028618e-07, "loss": 0.5848, "step": 3152 }, { "epoch": 0.5087124878993223, "grad_norm": 1.524595163109523, "learning_rate": 5.269316663604865e-07, "loss": 0.3182, "step": 3153 }, { "epoch": 0.5088738302678283, "grad_norm": 1.695870233020281, "learning_rate": 5.266652705510777e-07, "loss": 0.5754, "step": 3154 }, { "epoch": 0.5090351726363344, "grad_norm": 1.7565015863790931, "learning_rate": 5.263988671504741e-07, "loss": 0.5195, "step": 3155 }, { "epoch": 0.5091965150048403, "grad_norm": 1.7838552730213457, "learning_rate": 5.261324562345171e-07, "loss": 0.5032, "step": 3156 }, { "epoch": 0.5093578573733463, "grad_norm": 1.4834427968186388, "learning_rate": 5.258660378790495e-07, "loss": 0.3249, "step": 3157 }, { "epoch": 0.5095191997418522, "grad_norm": 1.198950224892347, "learning_rate": 5.255996121599167e-07, "loss": 0.4152, "step": 3158 }, { "epoch": 0.5096805421103582, "grad_norm": 1.6244489028751776, "learning_rate": 5.253331791529656e-07, "loss": 0.5004, "step": 3159 }, { "epoch": 0.5098418844788641, "grad_norm": 1.2096974576295354, "learning_rate": 5.25066738934046e-07, "loss": 0.4092, "step": 3160 }, { "epoch": 0.5100032268473701, "grad_norm": 1.6415696104118267, "learning_rate": 5.248002915790093e-07, "loss": 0.4528, "step": 3161 }, { "epoch": 0.510164569215876, "grad_norm": 1.5756577193219543, "learning_rate": 5.245338371637091e-07, "loss": 0.4113, "step": 3162 }, { "epoch": 0.510325911584382, "grad_norm": 2.62120244672042, "learning_rate": 5.242673757640005e-07, "loss": 0.4953, "step": 3163 }, { "epoch": 0.5104872539528881, "grad_norm": 1.675604964367362, "learning_rate": 5.240009074557413e-07, "loss": 0.5781, "step": 3164 }, { "epoch": 0.510648596321394, "grad_norm": 1.5558898723581358, "learning_rate": 5.237344323147908e-07, "loss": 0.43, "step": 3165 }, { "epoch": 0.5108099386899, "grad_norm": 1.4275038255149235, "learning_rate": 5.234679504170108e-07, "loss": 0.6995, "step": 3166 }, { "epoch": 0.510971281058406, "grad_norm": 1.7156112447996723, "learning_rate": 5.232014618382641e-07, "loss": 0.4948, "step": 3167 }, { "epoch": 0.5111326234269119, "grad_norm": 1.8125180210664593, "learning_rate": 5.22934966654416e-07, "loss": 0.4564, "step": 3168 }, { "epoch": 0.5112939657954179, "grad_norm": 1.822030366060399, "learning_rate": 5.226684649413339e-07, "loss": 0.5988, "step": 3169 }, { "epoch": 0.5114553081639238, "grad_norm": 1.5928973740867798, "learning_rate": 5.224019567748866e-07, "loss": 0.3703, "step": 3170 }, { "epoch": 0.5116166505324298, "grad_norm": 1.7799368585681634, "learning_rate": 5.221354422309447e-07, "loss": 0.4809, "step": 3171 }, { "epoch": 0.5117779929009358, "grad_norm": 1.564288146122226, "learning_rate": 5.218689213853811e-07, "loss": 0.4973, "step": 3172 }, { "epoch": 0.5119393352694418, "grad_norm": 1.6880778983252547, "learning_rate": 5.2160239431407e-07, "loss": 0.4343, "step": 3173 }, { "epoch": 0.5121006776379478, "grad_norm": 1.461873453375107, "learning_rate": 5.213358610928877e-07, "loss": 0.4473, "step": 3174 }, { "epoch": 0.5122620200064537, "grad_norm": 1.6661860567774418, "learning_rate": 5.210693217977122e-07, "loss": 0.6832, "step": 3175 }, { "epoch": 0.5124233623749597, "grad_norm": 2.0666904073807757, "learning_rate": 5.20802776504423e-07, "loss": 0.6278, "step": 3176 }, { "epoch": 0.5125847047434656, "grad_norm": 1.5813713298583445, "learning_rate": 5.205362252889015e-07, "loss": 0.4581, "step": 3177 }, { "epoch": 0.5127460471119716, "grad_norm": 1.466250382575386, "learning_rate": 5.202696682270309e-07, "loss": 0.5027, "step": 3178 }, { "epoch": 0.5129073894804775, "grad_norm": 1.5602337997028226, "learning_rate": 5.200031053946958e-07, "loss": 0.5092, "step": 3179 }, { "epoch": 0.5130687318489835, "grad_norm": 1.5247896893676052, "learning_rate": 5.197365368677827e-07, "loss": 0.5416, "step": 3180 }, { "epoch": 0.5132300742174896, "grad_norm": 1.780483147703963, "learning_rate": 5.194699627221793e-07, "loss": 0.5428, "step": 3181 }, { "epoch": 0.5133914165859955, "grad_norm": 1.6775189262493597, "learning_rate": 5.192033830337753e-07, "loss": 0.4059, "step": 3182 }, { "epoch": 0.5135527589545015, "grad_norm": 1.6390149163247596, "learning_rate": 5.189367978784619e-07, "loss": 0.5294, "step": 3183 }, { "epoch": 0.5137141013230074, "grad_norm": 1.6288902227038626, "learning_rate": 5.186702073321318e-07, "loss": 0.4759, "step": 3184 }, { "epoch": 0.5138754436915134, "grad_norm": 2.478819387352525, "learning_rate": 5.184036114706794e-07, "loss": 0.4414, "step": 3185 }, { "epoch": 0.5140367860600193, "grad_norm": 1.3564610435721631, "learning_rate": 5.181370103700003e-07, "loss": 0.3952, "step": 3186 }, { "epoch": 0.5141981284285253, "grad_norm": 1.7053727881798832, "learning_rate": 5.178704041059917e-07, "loss": 0.5545, "step": 3187 }, { "epoch": 0.5143594707970313, "grad_norm": 1.4675417049012234, "learning_rate": 5.176037927545523e-07, "loss": 0.402, "step": 3188 }, { "epoch": 0.5145208131655372, "grad_norm": 1.676222736384412, "learning_rate": 5.173371763915827e-07, "loss": 0.4447, "step": 3189 }, { "epoch": 0.5146821555340433, "grad_norm": 1.760174533361302, "learning_rate": 5.170705550929839e-07, "loss": 0.6035, "step": 3190 }, { "epoch": 0.5148434979025492, "grad_norm": 1.6788061624251633, "learning_rate": 5.168039289346593e-07, "loss": 0.6705, "step": 3191 }, { "epoch": 0.5150048402710552, "grad_norm": 1.5881922436574611, "learning_rate": 5.165372979925132e-07, "loss": 0.5593, "step": 3192 }, { "epoch": 0.5151661826395612, "grad_norm": 1.512778844429456, "learning_rate": 5.162706623424513e-07, "loss": 0.5202, "step": 3193 }, { "epoch": 0.5153275250080671, "grad_norm": 1.3708818059929537, "learning_rate": 5.160040220603807e-07, "loss": 0.6337, "step": 3194 }, { "epoch": 0.5154888673765731, "grad_norm": 1.9819862826745505, "learning_rate": 5.157373772222098e-07, "loss": 0.616, "step": 3195 }, { "epoch": 0.515650209745079, "grad_norm": 1.8222028878912695, "learning_rate": 5.154707279038484e-07, "loss": 0.5948, "step": 3196 }, { "epoch": 0.515811552113585, "grad_norm": 1.3996940380641265, "learning_rate": 5.152040741812074e-07, "loss": 0.3917, "step": 3197 }, { "epoch": 0.515972894482091, "grad_norm": 1.2459696645151388, "learning_rate": 5.14937416130199e-07, "loss": 0.4688, "step": 3198 }, { "epoch": 0.516134236850597, "grad_norm": 1.6448524143370924, "learning_rate": 5.146707538267368e-07, "loss": 0.4837, "step": 3199 }, { "epoch": 0.516295579219103, "grad_norm": 2.0745233287000655, "learning_rate": 5.144040873467353e-07, "loss": 0.6464, "step": 3200 }, { "epoch": 0.5164569215876089, "grad_norm": 1.651636572036617, "learning_rate": 5.141374167661105e-07, "loss": 0.4963, "step": 3201 }, { "epoch": 0.5166182639561149, "grad_norm": 1.872079991498192, "learning_rate": 5.138707421607794e-07, "loss": 0.4556, "step": 3202 }, { "epoch": 0.5167796063246208, "grad_norm": 1.711172915074638, "learning_rate": 5.136040636066601e-07, "loss": 0.5908, "step": 3203 }, { "epoch": 0.5169409486931268, "grad_norm": 1.8132902593031384, "learning_rate": 5.133373811796722e-07, "loss": 0.5123, "step": 3204 }, { "epoch": 0.5171022910616327, "grad_norm": 1.664884480658823, "learning_rate": 5.130706949557358e-07, "loss": 0.5179, "step": 3205 }, { "epoch": 0.5172636334301387, "grad_norm": 2.135369413519835, "learning_rate": 5.128040050107724e-07, "loss": 0.7556, "step": 3206 }, { "epoch": 0.5174249757986448, "grad_norm": 1.9157704732150145, "learning_rate": 5.125373114207046e-07, "loss": 0.525, "step": 3207 }, { "epoch": 0.5175863181671507, "grad_norm": 1.3572452128324686, "learning_rate": 5.122706142614562e-07, "loss": 0.3186, "step": 3208 }, { "epoch": 0.5177476605356567, "grad_norm": 1.4247927230531328, "learning_rate": 5.120039136089515e-07, "loss": 0.4821, "step": 3209 }, { "epoch": 0.5179090029041626, "grad_norm": 1.747601432094044, "learning_rate": 5.117372095391162e-07, "loss": 0.4088, "step": 3210 }, { "epoch": 0.5180703452726686, "grad_norm": 1.853078229806084, "learning_rate": 5.114705021278769e-07, "loss": 0.6048, "step": 3211 }, { "epoch": 0.5182316876411746, "grad_norm": 1.5026979819309307, "learning_rate": 5.112037914511613e-07, "loss": 0.4525, "step": 3212 }, { "epoch": 0.5183930300096805, "grad_norm": 1.790998985021869, "learning_rate": 5.109370775848972e-07, "loss": 0.5407, "step": 3213 }, { "epoch": 0.5185543723781865, "grad_norm": 1.5502949680105218, "learning_rate": 5.106703606050148e-07, "loss": 0.4168, "step": 3214 }, { "epoch": 0.5187157147466925, "grad_norm": 1.4851978631287523, "learning_rate": 5.104036405874439e-07, "loss": 0.4382, "step": 3215 }, { "epoch": 0.5188770571151985, "grad_norm": 1.4902548532341342, "learning_rate": 5.101369176081155e-07, "loss": 0.5656, "step": 3216 }, { "epoch": 0.5190383994837044, "grad_norm": 1.6569783030984706, "learning_rate": 5.098701917429618e-07, "loss": 0.4284, "step": 3217 }, { "epoch": 0.5191997418522104, "grad_norm": 1.308879900718668, "learning_rate": 5.096034630679156e-07, "loss": 0.4003, "step": 3218 }, { "epoch": 0.5193610842207164, "grad_norm": 1.3922132359104997, "learning_rate": 5.093367316589102e-07, "loss": 0.4825, "step": 3219 }, { "epoch": 0.5195224265892223, "grad_norm": 1.7596980853567683, "learning_rate": 5.090699975918803e-07, "loss": 0.5058, "step": 3220 }, { "epoch": 0.5196837689577283, "grad_norm": 1.7567504387026167, "learning_rate": 5.088032609427606e-07, "loss": 0.5293, "step": 3221 }, { "epoch": 0.5198451113262342, "grad_norm": 1.2674698746692048, "learning_rate": 5.085365217874874e-07, "loss": 0.4873, "step": 3222 }, { "epoch": 0.5200064536947402, "grad_norm": 1.2541520777938453, "learning_rate": 5.082697802019969e-07, "loss": 0.4475, "step": 3223 }, { "epoch": 0.5201677960632463, "grad_norm": 1.2906952661039852, "learning_rate": 5.080030362622265e-07, "loss": 0.4965, "step": 3224 }, { "epoch": 0.5203291384317522, "grad_norm": 2.000826188148452, "learning_rate": 5.077362900441141e-07, "loss": 0.6152, "step": 3225 }, { "epoch": 0.5204904808002582, "grad_norm": 1.989791206831372, "learning_rate": 5.074695416235982e-07, "loss": 0.7, "step": 3226 }, { "epoch": 0.5206518231687641, "grad_norm": 1.3729286197186397, "learning_rate": 5.072027910766181e-07, "loss": 0.4257, "step": 3227 }, { "epoch": 0.5208131655372701, "grad_norm": 2.0207728216281167, "learning_rate": 5.069360384791137e-07, "loss": 0.4922, "step": 3228 }, { "epoch": 0.520974507905776, "grad_norm": 1.214162261281152, "learning_rate": 5.06669283907025e-07, "loss": 0.4059, "step": 3229 }, { "epoch": 0.521135850274282, "grad_norm": 1.7215721542522557, "learning_rate": 5.064025274362932e-07, "loss": 0.5571, "step": 3230 }, { "epoch": 0.521297192642788, "grad_norm": 1.3956017325785524, "learning_rate": 5.061357691428599e-07, "loss": 0.4365, "step": 3231 }, { "epoch": 0.5214585350112939, "grad_norm": 1.2535998008569633, "learning_rate": 5.058690091026671e-07, "loss": 0.4804, "step": 3232 }, { "epoch": 0.5216198773798, "grad_norm": 1.6438892407058994, "learning_rate": 5.05602247391657e-07, "loss": 0.4084, "step": 3233 }, { "epoch": 0.5217812197483059, "grad_norm": 0.9151775699038458, "learning_rate": 5.053354840857728e-07, "loss": 0.3061, "step": 3234 }, { "epoch": 0.5219425621168119, "grad_norm": 2.3138372318057567, "learning_rate": 5.050687192609578e-07, "loss": 0.8175, "step": 3235 }, { "epoch": 0.5221039044853178, "grad_norm": 1.0953710260141756, "learning_rate": 5.048019529931561e-07, "loss": 0.4472, "step": 3236 }, { "epoch": 0.5222652468538238, "grad_norm": 1.5629988064898659, "learning_rate": 5.045351853583118e-07, "loss": 0.5092, "step": 3237 }, { "epoch": 0.5224265892223298, "grad_norm": 2.011842238700741, "learning_rate": 5.042684164323697e-07, "loss": 0.5491, "step": 3238 }, { "epoch": 0.5225879315908357, "grad_norm": 2.00953191022934, "learning_rate": 5.040016462912745e-07, "loss": 0.6135, "step": 3239 }, { "epoch": 0.5227492739593417, "grad_norm": 1.5667451791501963, "learning_rate": 5.037348750109719e-07, "loss": 0.4762, "step": 3240 }, { "epoch": 0.5229106163278477, "grad_norm": 1.0653772664657524, "learning_rate": 5.034681026674076e-07, "loss": 0.4675, "step": 3241 }, { "epoch": 0.5230719586963537, "grad_norm": 1.34457983963194, "learning_rate": 5.032013293365276e-07, "loss": 0.4383, "step": 3242 }, { "epoch": 0.5232333010648597, "grad_norm": 1.2443765509388338, "learning_rate": 5.029345550942778e-07, "loss": 0.2459, "step": 3243 }, { "epoch": 0.5233946434333656, "grad_norm": 1.014855606914322, "learning_rate": 5.026677800166053e-07, "loss": 0.3461, "step": 3244 }, { "epoch": 0.5235559858018716, "grad_norm": 1.5655689617795585, "learning_rate": 5.024010041794566e-07, "loss": 0.3575, "step": 3245 }, { "epoch": 0.5237173281703775, "grad_norm": 1.8594982202556778, "learning_rate": 5.021342276587787e-07, "loss": 0.4224, "step": 3246 }, { "epoch": 0.5238786705388835, "grad_norm": 1.4097774345514251, "learning_rate": 5.01867450530519e-07, "loss": 0.4411, "step": 3247 }, { "epoch": 0.5240400129073894, "grad_norm": 1.558935983509439, "learning_rate": 5.016006728706244e-07, "loss": 0.4887, "step": 3248 }, { "epoch": 0.5242013552758954, "grad_norm": 1.614454235535246, "learning_rate": 5.013338947550428e-07, "loss": 0.4784, "step": 3249 }, { "epoch": 0.5243626976444015, "grad_norm": 1.9149999697214628, "learning_rate": 5.01067116259722e-07, "loss": 0.5364, "step": 3250 }, { "epoch": 0.5245240400129074, "grad_norm": 1.423861279513682, "learning_rate": 5.008003374606093e-07, "loss": 0.4417, "step": 3251 }, { "epoch": 0.5246853823814134, "grad_norm": 2.0821937368804213, "learning_rate": 5.005335584336529e-07, "loss": 0.5978, "step": 3252 }, { "epoch": 0.5248467247499193, "grad_norm": 1.4628681339017056, "learning_rate": 5.002667792548003e-07, "loss": 0.4149, "step": 3253 }, { "epoch": 0.5250080671184253, "grad_norm": 5.578144319217683, "learning_rate": 5e-07, "loss": 0.511, "step": 3254 }, { "epoch": 0.5251694094869312, "grad_norm": 1.5558173899692682, "learning_rate": 4.997332207451997e-07, "loss": 0.5461, "step": 3255 }, { "epoch": 0.5253307518554372, "grad_norm": 1.5197147366473358, "learning_rate": 4.994664415663472e-07, "loss": 0.5245, "step": 3256 }, { "epoch": 0.5254920942239432, "grad_norm": 1.5244734933962845, "learning_rate": 4.991996625393908e-07, "loss": 0.4986, "step": 3257 }, { "epoch": 0.5256534365924492, "grad_norm": 1.5653240622750082, "learning_rate": 4.989328837402782e-07, "loss": 0.5802, "step": 3258 }, { "epoch": 0.5258147789609552, "grad_norm": 1.3316953800544942, "learning_rate": 4.986661052449572e-07, "loss": 0.3653, "step": 3259 }, { "epoch": 0.5259761213294611, "grad_norm": 2.3407479960538837, "learning_rate": 4.983993271293756e-07, "loss": 0.6501, "step": 3260 }, { "epoch": 0.5261374636979671, "grad_norm": 1.7401901222365161, "learning_rate": 4.981325494694811e-07, "loss": 0.4815, "step": 3261 }, { "epoch": 0.5262988060664731, "grad_norm": 1.2899008596752048, "learning_rate": 4.978657723412211e-07, "loss": 0.4565, "step": 3262 }, { "epoch": 0.526460148434979, "grad_norm": 1.2239921998012304, "learning_rate": 4.975989958205433e-07, "loss": 0.2794, "step": 3263 }, { "epoch": 0.526621490803485, "grad_norm": 1.6096362300077278, "learning_rate": 4.973322199833947e-07, "loss": 0.3666, "step": 3264 }, { "epoch": 0.5267828331719909, "grad_norm": 1.4916702892039264, "learning_rate": 4.970654449057221e-07, "loss": 0.2611, "step": 3265 }, { "epoch": 0.5269441755404969, "grad_norm": 1.4053544053642906, "learning_rate": 4.967986706634724e-07, "loss": 0.4988, "step": 3266 }, { "epoch": 0.527105517909003, "grad_norm": 1.4764986377610312, "learning_rate": 4.965318973325924e-07, "loss": 0.3688, "step": 3267 }, { "epoch": 0.5272668602775089, "grad_norm": 1.9123584869487678, "learning_rate": 4.962651249890281e-07, "loss": 0.5366, "step": 3268 }, { "epoch": 0.5274282026460149, "grad_norm": 1.612557458962756, "learning_rate": 4.959983537087254e-07, "loss": 0.4042, "step": 3269 }, { "epoch": 0.5275895450145208, "grad_norm": 1.4225746361807385, "learning_rate": 4.957315835676304e-07, "loss": 0.5979, "step": 3270 }, { "epoch": 0.5277508873830268, "grad_norm": 1.417116692066361, "learning_rate": 4.954648146416882e-07, "loss": 0.4234, "step": 3271 }, { "epoch": 0.5279122297515327, "grad_norm": 1.4752244487619006, "learning_rate": 4.951980470068438e-07, "loss": 0.4629, "step": 3272 }, { "epoch": 0.5280735721200387, "grad_norm": 1.5312603541432857, "learning_rate": 4.949312807390422e-07, "loss": 0.4595, "step": 3273 }, { "epoch": 0.5282349144885446, "grad_norm": 1.4815605385314148, "learning_rate": 4.946645159142273e-07, "loss": 0.555, "step": 3274 }, { "epoch": 0.5283962568570507, "grad_norm": 1.8022058772351843, "learning_rate": 4.94397752608343e-07, "loss": 0.4592, "step": 3275 }, { "epoch": 0.5285575992255567, "grad_norm": 1.6602228846921334, "learning_rate": 4.94130990897333e-07, "loss": 0.5398, "step": 3276 }, { "epoch": 0.5287189415940626, "grad_norm": 1.903064880924852, "learning_rate": 4.938642308571401e-07, "loss": 0.5812, "step": 3277 }, { "epoch": 0.5288802839625686, "grad_norm": 1.6094580323364052, "learning_rate": 4.935974725637067e-07, "loss": 0.4049, "step": 3278 }, { "epoch": 0.5290416263310745, "grad_norm": 1.9464356271058436, "learning_rate": 4.93330716092975e-07, "loss": 0.5174, "step": 3279 }, { "epoch": 0.5292029686995805, "grad_norm": 1.8607259378677672, "learning_rate": 4.930639615208864e-07, "loss": 0.5362, "step": 3280 }, { "epoch": 0.5293643110680865, "grad_norm": 1.6427562857405873, "learning_rate": 4.92797208923382e-07, "loss": 0.435, "step": 3281 }, { "epoch": 0.5295256534365924, "grad_norm": 1.6536790579113914, "learning_rate": 4.925304583764019e-07, "loss": 0.4574, "step": 3282 }, { "epoch": 0.5296869958050984, "grad_norm": 1.4934813796607684, "learning_rate": 4.922637099558859e-07, "loss": 0.4878, "step": 3283 }, { "epoch": 0.5298483381736044, "grad_norm": 1.472469490007204, "learning_rate": 4.919969637377734e-07, "loss": 0.4313, "step": 3284 }, { "epoch": 0.5300096805421104, "grad_norm": 1.5353121690460538, "learning_rate": 4.917302197980031e-07, "loss": 0.3846, "step": 3285 }, { "epoch": 0.5301710229106164, "grad_norm": 2.435924901434206, "learning_rate": 4.914634782125126e-07, "loss": 0.6703, "step": 3286 }, { "epoch": 0.5303323652791223, "grad_norm": 1.4088841245313635, "learning_rate": 4.911967390572394e-07, "loss": 0.4808, "step": 3287 }, { "epoch": 0.5304937076476283, "grad_norm": 2.0257443272778257, "learning_rate": 4.909300024081198e-07, "loss": 0.3688, "step": 3288 }, { "epoch": 0.5306550500161342, "grad_norm": 1.3753415897190595, "learning_rate": 4.906632683410898e-07, "loss": 0.3725, "step": 3289 }, { "epoch": 0.5308163923846402, "grad_norm": 1.4144670108091453, "learning_rate": 4.903965369320845e-07, "loss": 0.2509, "step": 3290 }, { "epoch": 0.5309777347531461, "grad_norm": 2.0063411798880932, "learning_rate": 4.901298082570383e-07, "loss": 0.537, "step": 3291 }, { "epoch": 0.5311390771216521, "grad_norm": 1.5277438603113795, "learning_rate": 4.898630823918845e-07, "loss": 0.4965, "step": 3292 }, { "epoch": 0.5313004194901582, "grad_norm": 1.2493037669039613, "learning_rate": 4.895963594125563e-07, "loss": 0.3148, "step": 3293 }, { "epoch": 0.5314617618586641, "grad_norm": 1.6788028250284446, "learning_rate": 4.893296393949853e-07, "loss": 0.4433, "step": 3294 }, { "epoch": 0.5316231042271701, "grad_norm": 1.5228252256453918, "learning_rate": 4.890629224151027e-07, "loss": 0.4602, "step": 3295 }, { "epoch": 0.531784446595676, "grad_norm": 1.858091985599764, "learning_rate": 4.887962085488388e-07, "loss": 0.4419, "step": 3296 }, { "epoch": 0.531945788964182, "grad_norm": 1.5021801840165763, "learning_rate": 4.88529497872123e-07, "loss": 0.5116, "step": 3297 }, { "epoch": 0.5321071313326879, "grad_norm": 1.756472267394535, "learning_rate": 4.882627904608837e-07, "loss": 0.6168, "step": 3298 }, { "epoch": 0.5322684737011939, "grad_norm": 1.416419157210306, "learning_rate": 4.879960863910486e-07, "loss": 0.4405, "step": 3299 }, { "epoch": 0.5324298160696999, "grad_norm": 1.8071112272422862, "learning_rate": 4.877293857385439e-07, "loss": 0.5436, "step": 3300 }, { "epoch": 0.5325911584382059, "grad_norm": 2.2185968695438025, "learning_rate": 4.874626885792953e-07, "loss": 0.6757, "step": 3301 }, { "epoch": 0.5327525008067119, "grad_norm": 1.6292621763417368, "learning_rate": 4.871959949892277e-07, "loss": 0.5038, "step": 3302 }, { "epoch": 0.5329138431752178, "grad_norm": 1.722246186113613, "learning_rate": 4.869293050442644e-07, "loss": 0.665, "step": 3303 }, { "epoch": 0.5330751855437238, "grad_norm": 1.7592654856447087, "learning_rate": 4.86662618820328e-07, "loss": 0.6068, "step": 3304 }, { "epoch": 0.5332365279122298, "grad_norm": 1.2125602923474856, "learning_rate": 4.863959363933399e-07, "loss": 0.4076, "step": 3305 }, { "epoch": 0.5333978702807357, "grad_norm": 1.7533962808243442, "learning_rate": 4.861292578392205e-07, "loss": 0.5176, "step": 3306 }, { "epoch": 0.5335592126492417, "grad_norm": 1.667940376272683, "learning_rate": 4.858625832338895e-07, "loss": 0.4177, "step": 3307 }, { "epoch": 0.5337205550177476, "grad_norm": 1.72604749084239, "learning_rate": 4.855959126532647e-07, "loss": 0.5465, "step": 3308 }, { "epoch": 0.5338818973862536, "grad_norm": 1.8945905223894888, "learning_rate": 4.853292461732633e-07, "loss": 0.4251, "step": 3309 }, { "epoch": 0.5340432397547596, "grad_norm": 1.805122356124663, "learning_rate": 4.85062583869801e-07, "loss": 0.4165, "step": 3310 }, { "epoch": 0.5342045821232656, "grad_norm": 1.4990742528795438, "learning_rate": 4.847959258187926e-07, "loss": 0.5026, "step": 3311 }, { "epoch": 0.5343659244917716, "grad_norm": 1.8904773875218195, "learning_rate": 4.845292720961517e-07, "loss": 0.5251, "step": 3312 }, { "epoch": 0.5345272668602775, "grad_norm": 1.6461617186445152, "learning_rate": 4.842626227777903e-07, "loss": 0.4725, "step": 3313 }, { "epoch": 0.5346886092287835, "grad_norm": 1.8390919309479625, "learning_rate": 4.839959779396193e-07, "loss": 0.5001, "step": 3314 }, { "epoch": 0.5348499515972894, "grad_norm": 2.0479903349201307, "learning_rate": 4.837293376575487e-07, "loss": 0.6084, "step": 3315 }, { "epoch": 0.5350112939657954, "grad_norm": 1.2959905618953833, "learning_rate": 4.83462702007487e-07, "loss": 0.6181, "step": 3316 }, { "epoch": 0.5351726363343013, "grad_norm": 2.110570272245033, "learning_rate": 4.831960710653408e-07, "loss": 0.4745, "step": 3317 }, { "epoch": 0.5353339787028074, "grad_norm": 1.8149590746676647, "learning_rate": 4.829294449070161e-07, "loss": 0.4075, "step": 3318 }, { "epoch": 0.5354953210713134, "grad_norm": 1.062081871784971, "learning_rate": 4.826628236084173e-07, "loss": 0.4012, "step": 3319 }, { "epoch": 0.5356566634398193, "grad_norm": 1.9706080917567037, "learning_rate": 4.823962072454475e-07, "loss": 0.6213, "step": 3320 }, { "epoch": 0.5358180058083253, "grad_norm": 1.5730475146034444, "learning_rate": 4.821295958940083e-07, "loss": 0.4886, "step": 3321 }, { "epoch": 0.5359793481768312, "grad_norm": 1.872161815239809, "learning_rate": 4.818629896299998e-07, "loss": 0.5822, "step": 3322 }, { "epoch": 0.5361406905453372, "grad_norm": 1.7647481445786946, "learning_rate": 4.815963885293205e-07, "loss": 0.501, "step": 3323 }, { "epoch": 0.5363020329138432, "grad_norm": 1.8224574209924813, "learning_rate": 4.813297926678681e-07, "loss": 0.5945, "step": 3324 }, { "epoch": 0.5364633752823491, "grad_norm": 1.6183505091883557, "learning_rate": 4.810632021215381e-07, "loss": 0.4029, "step": 3325 }, { "epoch": 0.5366247176508551, "grad_norm": 1.8856613323591844, "learning_rate": 4.807966169662248e-07, "loss": 0.5021, "step": 3326 }, { "epoch": 0.5367860600193611, "grad_norm": 2.065674621632375, "learning_rate": 4.805300372778208e-07, "loss": 0.4688, "step": 3327 }, { "epoch": 0.5369474023878671, "grad_norm": 1.7990097633465842, "learning_rate": 4.802634631322174e-07, "loss": 0.468, "step": 3328 }, { "epoch": 0.537108744756373, "grad_norm": 1.8673411669205355, "learning_rate": 4.799968946053041e-07, "loss": 0.4739, "step": 3329 }, { "epoch": 0.537270087124879, "grad_norm": 1.3508232167543865, "learning_rate": 4.79730331772969e-07, "loss": 0.4327, "step": 3330 }, { "epoch": 0.537431429493385, "grad_norm": 1.9088414821923227, "learning_rate": 4.794637747110984e-07, "loss": 0.5551, "step": 3331 }, { "epoch": 0.5375927718618909, "grad_norm": 1.9518562163060809, "learning_rate": 4.79197223495577e-07, "loss": 0.5075, "step": 3332 }, { "epoch": 0.5377541142303969, "grad_norm": 1.9252782323332809, "learning_rate": 4.789306782022878e-07, "loss": 0.6845, "step": 3333 }, { "epoch": 0.5379154565989028, "grad_norm": 1.2388838496103327, "learning_rate": 4.786641389071123e-07, "loss": 0.4871, "step": 3334 }, { "epoch": 0.5380767989674088, "grad_norm": 1.5404291426951617, "learning_rate": 4.783976056859301e-07, "loss": 0.4876, "step": 3335 }, { "epoch": 0.5382381413359149, "grad_norm": 1.5047132036305302, "learning_rate": 4.78131078614619e-07, "loss": 0.6499, "step": 3336 }, { "epoch": 0.5383994837044208, "grad_norm": 1.192586745459959, "learning_rate": 4.778645577690554e-07, "loss": 0.4113, "step": 3337 }, { "epoch": 0.5385608260729268, "grad_norm": 1.4312512127067392, "learning_rate": 4.775980432251137e-07, "loss": 0.3705, "step": 3338 }, { "epoch": 0.5387221684414327, "grad_norm": 1.5469750266053663, "learning_rate": 4.773315350586663e-07, "loss": 0.5451, "step": 3339 }, { "epoch": 0.5388835108099387, "grad_norm": 1.1823469084663092, "learning_rate": 4.77065033345584e-07, "loss": 0.5785, "step": 3340 }, { "epoch": 0.5390448531784446, "grad_norm": 1.4416153918870354, "learning_rate": 4.76798538161736e-07, "loss": 0.4917, "step": 3341 }, { "epoch": 0.5392061955469506, "grad_norm": 1.8128040979404711, "learning_rate": 4.765320495829892e-07, "loss": 0.5407, "step": 3342 }, { "epoch": 0.5393675379154566, "grad_norm": 1.7325038955006147, "learning_rate": 4.7626556768520897e-07, "loss": 0.532, "step": 3343 }, { "epoch": 0.5395288802839626, "grad_norm": 1.517170069365548, "learning_rate": 4.759990925442587e-07, "loss": 0.403, "step": 3344 }, { "epoch": 0.5396902226524686, "grad_norm": 1.732022588877688, "learning_rate": 4.757326242359995e-07, "loss": 0.6333, "step": 3345 }, { "epoch": 0.5398515650209745, "grad_norm": 1.6970799808795327, "learning_rate": 4.75466162836291e-07, "loss": 0.4836, "step": 3346 }, { "epoch": 0.5400129073894805, "grad_norm": 1.4119913940950901, "learning_rate": 4.7519970842099064e-07, "loss": 0.4755, "step": 3347 }, { "epoch": 0.5401742497579864, "grad_norm": 1.436673880660022, "learning_rate": 4.7493326106595406e-07, "loss": 0.4007, "step": 3348 }, { "epoch": 0.5403355921264924, "grad_norm": 1.5004956698026275, "learning_rate": 4.7466682084703444e-07, "loss": 0.4076, "step": 3349 }, { "epoch": 0.5404969344949984, "grad_norm": 1.4282825842128022, "learning_rate": 4.744003878400835e-07, "loss": 0.5816, "step": 3350 }, { "epoch": 0.5406582768635043, "grad_norm": 1.7374019842151207, "learning_rate": 4.741339621209506e-07, "loss": 0.6188, "step": 3351 }, { "epoch": 0.5408196192320103, "grad_norm": 1.70012666847445, "learning_rate": 4.7386754376548276e-07, "loss": 0.5183, "step": 3352 }, { "epoch": 0.5409809616005163, "grad_norm": 2.0997706151750335, "learning_rate": 4.7360113284952575e-07, "loss": 0.6496, "step": 3353 }, { "epoch": 0.5411423039690223, "grad_norm": 1.2548603931606266, "learning_rate": 4.733347294489224e-07, "loss": 0.3835, "step": 3354 }, { "epoch": 0.5413036463375283, "grad_norm": 1.9106203229283523, "learning_rate": 4.7306833363951356e-07, "loss": 0.6167, "step": 3355 }, { "epoch": 0.5414649887060342, "grad_norm": 1.6319903626531391, "learning_rate": 4.7280194549713826e-07, "loss": 0.4925, "step": 3356 }, { "epoch": 0.5416263310745402, "grad_norm": 1.5036497219323022, "learning_rate": 4.72535565097633e-07, "loss": 0.412, "step": 3357 }, { "epoch": 0.5417876734430461, "grad_norm": 1.543958052233374, "learning_rate": 4.7226919251683236e-07, "loss": 0.6286, "step": 3358 }, { "epoch": 0.5419490158115521, "grad_norm": 1.5946426883466134, "learning_rate": 4.720028278305683e-07, "loss": 0.4475, "step": 3359 }, { "epoch": 0.542110358180058, "grad_norm": 1.9950545082198714, "learning_rate": 4.717364711146709e-07, "loss": 0.6657, "step": 3360 }, { "epoch": 0.5422717005485641, "grad_norm": 1.779441048803526, "learning_rate": 4.7147012244496795e-07, "loss": 0.5443, "step": 3361 }, { "epoch": 0.5424330429170701, "grad_norm": 1.5950340633282696, "learning_rate": 4.7120378189728454e-07, "loss": 0.5086, "step": 3362 }, { "epoch": 0.542594385285576, "grad_norm": 1.5873582611487114, "learning_rate": 4.709374495474441e-07, "loss": 0.5351, "step": 3363 }, { "epoch": 0.542755727654082, "grad_norm": 1.582240488791362, "learning_rate": 4.7067112547126697e-07, "loss": 0.5684, "step": 3364 }, { "epoch": 0.5429170700225879, "grad_norm": 1.5759298334912917, "learning_rate": 4.7040480974457203e-07, "loss": 0.5137, "step": 3365 }, { "epoch": 0.5430784123910939, "grad_norm": 1.6002941785023193, "learning_rate": 4.70138502443175e-07, "loss": 0.5172, "step": 3366 }, { "epoch": 0.5432397547595998, "grad_norm": 1.548978665938217, "learning_rate": 4.698722036428896e-07, "loss": 0.5698, "step": 3367 }, { "epoch": 0.5434010971281058, "grad_norm": 1.638298055683578, "learning_rate": 4.696059134195268e-07, "loss": 0.4514, "step": 3368 }, { "epoch": 0.5435624394966118, "grad_norm": 1.5664584562586799, "learning_rate": 4.693396318488957e-07, "loss": 0.5329, "step": 3369 }, { "epoch": 0.5437237818651178, "grad_norm": 1.5940114723089476, "learning_rate": 4.690733590068024e-07, "loss": 0.4927, "step": 3370 }, { "epoch": 0.5438851242336238, "grad_norm": 1.5082377886499916, "learning_rate": 4.6880709496905076e-07, "loss": 0.4021, "step": 3371 }, { "epoch": 0.5440464666021297, "grad_norm": 1.624812115297926, "learning_rate": 4.685408398114418e-07, "loss": 0.4619, "step": 3372 }, { "epoch": 0.5442078089706357, "grad_norm": 1.5624505607412806, "learning_rate": 4.682745936097747e-07, "loss": 0.3828, "step": 3373 }, { "epoch": 0.5443691513391417, "grad_norm": 1.1258291262122853, "learning_rate": 4.680083564398456e-07, "loss": 0.4555, "step": 3374 }, { "epoch": 0.5445304937076476, "grad_norm": 1.826284541770133, "learning_rate": 4.6774212837744775e-07, "loss": 0.6292, "step": 3375 }, { "epoch": 0.5446918360761536, "grad_norm": 1.4904529016341799, "learning_rate": 4.6747590949837273e-07, "loss": 0.4583, "step": 3376 }, { "epoch": 0.5448531784446595, "grad_norm": 1.5679538910470112, "learning_rate": 4.6720969987840867e-07, "loss": 0.3512, "step": 3377 }, { "epoch": 0.5450145208131656, "grad_norm": 1.466189323399117, "learning_rate": 4.669434995933416e-07, "loss": 0.479, "step": 3378 }, { "epoch": 0.5451758631816715, "grad_norm": 1.2842293037141488, "learning_rate": 4.6667730871895455e-07, "loss": 0.4046, "step": 3379 }, { "epoch": 0.5453372055501775, "grad_norm": 1.4292569371993087, "learning_rate": 4.66411127331028e-07, "loss": 0.4751, "step": 3380 }, { "epoch": 0.5454985479186835, "grad_norm": 1.5486233781557752, "learning_rate": 4.6614495550533953e-07, "loss": 0.4909, "step": 3381 }, { "epoch": 0.5456598902871894, "grad_norm": 1.4285507268768072, "learning_rate": 4.6587879331766457e-07, "loss": 0.6084, "step": 3382 }, { "epoch": 0.5458212326556954, "grad_norm": 1.6597915787439652, "learning_rate": 4.656126408437751e-07, "loss": 0.4143, "step": 3383 }, { "epoch": 0.5459825750242013, "grad_norm": 1.5313361785927562, "learning_rate": 4.6534649815944085e-07, "loss": 0.4819, "step": 3384 }, { "epoch": 0.5461439173927073, "grad_norm": 1.4506285357483553, "learning_rate": 4.650803653404284e-07, "loss": 0.5021, "step": 3385 }, { "epoch": 0.5463052597612132, "grad_norm": 1.5355151196216434, "learning_rate": 4.6481424246250183e-07, "loss": 0.2777, "step": 3386 }, { "epoch": 0.5464666021297193, "grad_norm": 1.7538339306846866, "learning_rate": 4.6454812960142204e-07, "loss": 0.5488, "step": 3387 }, { "epoch": 0.5466279444982253, "grad_norm": 1.7405297299514095, "learning_rate": 4.6428202683294765e-07, "loss": 0.5583, "step": 3388 }, { "epoch": 0.5467892868667312, "grad_norm": 1.334759421441608, "learning_rate": 4.6401593423283386e-07, "loss": 0.571, "step": 3389 }, { "epoch": 0.5469506292352372, "grad_norm": 1.914084298145966, "learning_rate": 4.637498518768331e-07, "loss": 0.6081, "step": 3390 }, { "epoch": 0.5471119716037431, "grad_norm": 1.8478899882504227, "learning_rate": 4.6348377984069504e-07, "loss": 0.5343, "step": 3391 }, { "epoch": 0.5472733139722491, "grad_norm": 1.7565380986895294, "learning_rate": 4.632177182001663e-07, "loss": 0.6067, "step": 3392 }, { "epoch": 0.547434656340755, "grad_norm": 1.3642680359595911, "learning_rate": 4.6295166703099056e-07, "loss": 0.5911, "step": 3393 }, { "epoch": 0.547595998709261, "grad_norm": 1.5680633683634628, "learning_rate": 4.626856264089084e-07, "loss": 0.425, "step": 3394 }, { "epoch": 0.547757341077767, "grad_norm": 1.8685800634578777, "learning_rate": 4.624195964096578e-07, "loss": 0.5851, "step": 3395 }, { "epoch": 0.547918683446273, "grad_norm": 1.9908897211906915, "learning_rate": 4.621535771089731e-07, "loss": 0.6464, "step": 3396 }, { "epoch": 0.548080025814779, "grad_norm": 1.5270333913813263, "learning_rate": 4.618875685825862e-07, "loss": 0.5557, "step": 3397 }, { "epoch": 0.548241368183285, "grad_norm": 2.15107382195356, "learning_rate": 4.6162157090622526e-07, "loss": 0.6607, "step": 3398 }, { "epoch": 0.5484027105517909, "grad_norm": 1.487485818234326, "learning_rate": 4.6135558415561617e-07, "loss": 0.3684, "step": 3399 }, { "epoch": 0.5485640529202969, "grad_norm": 2.8282394807522646, "learning_rate": 4.610896084064813e-07, "loss": 0.6268, "step": 3400 }, { "epoch": 0.5487253952888028, "grad_norm": 1.4040311898926845, "learning_rate": 4.6082364373453956e-07, "loss": 0.486, "step": 3401 }, { "epoch": 0.5488867376573088, "grad_norm": 1.6504070935753778, "learning_rate": 4.605576902155073e-07, "loss": 0.5974, "step": 3402 }, { "epoch": 0.5490480800258147, "grad_norm": 1.8909474247684745, "learning_rate": 4.602917479250972e-07, "loss": 0.4446, "step": 3403 }, { "epoch": 0.5492094223943208, "grad_norm": 1.564400699418101, "learning_rate": 4.6002581693901907e-07, "loss": 0.6152, "step": 3404 }, { "epoch": 0.5493707647628268, "grad_norm": 1.5123308701966336, "learning_rate": 4.5975989733297954e-07, "loss": 0.5711, "step": 3405 }, { "epoch": 0.5495321071313327, "grad_norm": 1.793158494138574, "learning_rate": 4.594939891826816e-07, "loss": 0.5806, "step": 3406 }, { "epoch": 0.5496934494998387, "grad_norm": 1.4317874182661847, "learning_rate": 4.592280925638253e-07, "loss": 0.3863, "step": 3407 }, { "epoch": 0.5498547918683446, "grad_norm": 1.9084159552777704, "learning_rate": 4.589622075521074e-07, "loss": 0.6009, "step": 3408 }, { "epoch": 0.5500161342368506, "grad_norm": 2.0525485316682563, "learning_rate": 4.5869633422322133e-07, "loss": 0.595, "step": 3409 }, { "epoch": 0.5501774766053565, "grad_norm": 1.6604832226262605, "learning_rate": 4.584304726528569e-07, "loss": 0.5246, "step": 3410 }, { "epoch": 0.5503388189738625, "grad_norm": 1.5800467394905156, "learning_rate": 4.581646229167012e-07, "loss": 0.4226, "step": 3411 }, { "epoch": 0.5505001613423685, "grad_norm": 1.6252609190175786, "learning_rate": 4.578987850904373e-07, "loss": 0.6334, "step": 3412 }, { "epoch": 0.5506615037108745, "grad_norm": 1.7174888145160279, "learning_rate": 4.5763295924974543e-07, "loss": 0.5437, "step": 3413 }, { "epoch": 0.5508228460793805, "grad_norm": 1.876814790484661, "learning_rate": 4.5736714547030196e-07, "loss": 0.5087, "step": 3414 }, { "epoch": 0.5509841884478864, "grad_norm": 1.905641364818355, "learning_rate": 4.571013438277801e-07, "loss": 0.5701, "step": 3415 }, { "epoch": 0.5511455308163924, "grad_norm": 1.4621642159264936, "learning_rate": 4.5683555439784934e-07, "loss": 0.3816, "step": 3416 }, { "epoch": 0.5513068731848983, "grad_norm": 1.3117695092563837, "learning_rate": 4.56569777256176e-07, "loss": 0.4068, "step": 3417 }, { "epoch": 0.5514682155534043, "grad_norm": 1.66020960104992, "learning_rate": 4.563040124784228e-07, "loss": 0.6217, "step": 3418 }, { "epoch": 0.5516295579219103, "grad_norm": 2.1472304403924487, "learning_rate": 4.5603826014024884e-07, "loss": 0.6029, "step": 3419 }, { "epoch": 0.5517909002904162, "grad_norm": 1.739333068144839, "learning_rate": 4.557725203173096e-07, "loss": 0.5919, "step": 3420 }, { "epoch": 0.5519522426589223, "grad_norm": 1.4654396574364303, "learning_rate": 4.5550679308525735e-07, "loss": 0.6005, "step": 3421 }, { "epoch": 0.5521135850274282, "grad_norm": 1.860445243499211, "learning_rate": 4.552410785197405e-07, "loss": 0.6632, "step": 3422 }, { "epoch": 0.5522749273959342, "grad_norm": 1.5615925251729093, "learning_rate": 4.5497537669640404e-07, "loss": 0.5137, "step": 3423 }, { "epoch": 0.5524362697644402, "grad_norm": 1.8773675594018564, "learning_rate": 4.5470968769088915e-07, "loss": 0.6311, "step": 3424 }, { "epoch": 0.5525976121329461, "grad_norm": 1.7493125382666372, "learning_rate": 4.544440115788333e-07, "loss": 0.5143, "step": 3425 }, { "epoch": 0.5527589545014521, "grad_norm": 1.55390849317079, "learning_rate": 4.541783484358705e-07, "loss": 0.4337, "step": 3426 }, { "epoch": 0.552920296869958, "grad_norm": 1.4504878407349697, "learning_rate": 4.5391269833763105e-07, "loss": 0.4951, "step": 3427 }, { "epoch": 0.553081639238464, "grad_norm": 1.6648304917708456, "learning_rate": 4.536470613597414e-07, "loss": 0.6403, "step": 3428 }, { "epoch": 0.5532429816069699, "grad_norm": 1.9827230591137117, "learning_rate": 4.5338143757782425e-07, "loss": 0.5733, "step": 3429 }, { "epoch": 0.553404323975476, "grad_norm": 1.8424562343953332, "learning_rate": 4.5311582706749886e-07, "loss": 0.7496, "step": 3430 }, { "epoch": 0.553565666343982, "grad_norm": 1.7438456765455386, "learning_rate": 4.5285022990438027e-07, "loss": 0.5192, "step": 3431 }, { "epoch": 0.5537270087124879, "grad_norm": 1.2013431582019123, "learning_rate": 4.5258464616408e-07, "loss": 0.2841, "step": 3432 }, { "epoch": 0.5538883510809939, "grad_norm": 1.576620461161229, "learning_rate": 4.523190759222055e-07, "loss": 0.6085, "step": 3433 }, { "epoch": 0.5540496934494998, "grad_norm": 1.3209652161967527, "learning_rate": 4.5205351925436085e-07, "loss": 0.4193, "step": 3434 }, { "epoch": 0.5542110358180058, "grad_norm": 2.099562136096495, "learning_rate": 4.5178797623614586e-07, "loss": 0.6068, "step": 3435 }, { "epoch": 0.5543723781865117, "grad_norm": 1.5770383058248587, "learning_rate": 4.515224469431566e-07, "loss": 0.4839, "step": 3436 }, { "epoch": 0.5545337205550177, "grad_norm": 1.67643273412211, "learning_rate": 4.512569314509852e-07, "loss": 0.4114, "step": 3437 }, { "epoch": 0.5546950629235237, "grad_norm": 1.3354871113368387, "learning_rate": 4.5099142983521963e-07, "loss": 0.3834, "step": 3438 }, { "epoch": 0.5548564052920297, "grad_norm": 1.5605575694838496, "learning_rate": 4.507259421714445e-07, "loss": 0.4359, "step": 3439 }, { "epoch": 0.5550177476605357, "grad_norm": 1.6143984124719322, "learning_rate": 4.504604685352399e-07, "loss": 0.5574, "step": 3440 }, { "epoch": 0.5551790900290416, "grad_norm": 2.0145388493431184, "learning_rate": 4.501950090021821e-07, "loss": 0.5104, "step": 3441 }, { "epoch": 0.5553404323975476, "grad_norm": 1.6727063972834748, "learning_rate": 4.499295636478433e-07, "loss": 0.5111, "step": 3442 }, { "epoch": 0.5555017747660536, "grad_norm": 1.3762897597969905, "learning_rate": 4.4966413254779195e-07, "loss": 0.5019, "step": 3443 }, { "epoch": 0.5556631171345595, "grad_norm": 1.322209411958851, "learning_rate": 4.493987157775918e-07, "loss": 0.3886, "step": 3444 }, { "epoch": 0.5558244595030655, "grad_norm": 1.4119836268488806, "learning_rate": 4.4913331341280357e-07, "loss": 0.448, "step": 3445 }, { "epoch": 0.5559858018715714, "grad_norm": 1.9389784924601465, "learning_rate": 4.4886792552898283e-07, "loss": 0.4248, "step": 3446 }, { "epoch": 0.5561471442400775, "grad_norm": 1.9076772880909514, "learning_rate": 4.4860255220168164e-07, "loss": 0.6199, "step": 3447 }, { "epoch": 0.5563084866085835, "grad_norm": 1.6125106633743076, "learning_rate": 4.4833719350644744e-07, "loss": 0.5727, "step": 3448 }, { "epoch": 0.5564698289770894, "grad_norm": 1.6810264041649179, "learning_rate": 4.4807184951882414e-07, "loss": 0.5348, "step": 3449 }, { "epoch": 0.5566311713455954, "grad_norm": 1.687367893275524, "learning_rate": 4.4780652031435093e-07, "loss": 0.4565, "step": 3450 }, { "epoch": 0.5567925137141013, "grad_norm": 1.576801765062727, "learning_rate": 4.4754120596856304e-07, "loss": 0.4485, "step": 3451 }, { "epoch": 0.5569538560826073, "grad_norm": 1.6694113146089433, "learning_rate": 4.472759065569913e-07, "loss": 0.4105, "step": 3452 }, { "epoch": 0.5571151984511132, "grad_norm": 1.7682637439281563, "learning_rate": 4.470106221551624e-07, "loss": 0.5373, "step": 3453 }, { "epoch": 0.5572765408196192, "grad_norm": 1.6429831859264599, "learning_rate": 4.4674535283859896e-07, "loss": 0.6438, "step": 3454 }, { "epoch": 0.5574378831881251, "grad_norm": 1.3396956470367245, "learning_rate": 4.464800986828189e-07, "loss": 0.4685, "step": 3455 }, { "epoch": 0.5575992255566312, "grad_norm": 2.0393395948127893, "learning_rate": 4.4621485976333595e-07, "loss": 0.6081, "step": 3456 }, { "epoch": 0.5577605679251372, "grad_norm": 1.37798388703702, "learning_rate": 4.459496361556597e-07, "loss": 0.4843, "step": 3457 }, { "epoch": 0.5579219102936431, "grad_norm": 2.3203812405971327, "learning_rate": 4.4568442793529544e-07, "loss": 0.591, "step": 3458 }, { "epoch": 0.5580832526621491, "grad_norm": 1.5726952386411073, "learning_rate": 4.454192351777437e-07, "loss": 0.446, "step": 3459 }, { "epoch": 0.558244595030655, "grad_norm": 2.537845730835846, "learning_rate": 4.451540579585008e-07, "loss": 0.5562, "step": 3460 }, { "epoch": 0.558405937399161, "grad_norm": 1.3597181040660686, "learning_rate": 4.4488889635305845e-07, "loss": 0.3217, "step": 3461 }, { "epoch": 0.558567279767667, "grad_norm": 2.1520158410606047, "learning_rate": 4.446237504369045e-07, "loss": 0.3892, "step": 3462 }, { "epoch": 0.5587286221361729, "grad_norm": 1.71877392838901, "learning_rate": 4.443586202855217e-07, "loss": 0.5057, "step": 3463 }, { "epoch": 0.558889964504679, "grad_norm": 1.6627789055481677, "learning_rate": 4.4409350597438854e-07, "loss": 0.491, "step": 3464 }, { "epoch": 0.5590513068731849, "grad_norm": 1.968597285084866, "learning_rate": 4.43828407578979e-07, "loss": 0.5244, "step": 3465 }, { "epoch": 0.5592126492416909, "grad_norm": 1.5721021484305966, "learning_rate": 4.4356332517476267e-07, "loss": 0.4264, "step": 3466 }, { "epoch": 0.5593739916101969, "grad_norm": 1.556286779987847, "learning_rate": 4.432982588372042e-07, "loss": 0.368, "step": 3467 }, { "epoch": 0.5595353339787028, "grad_norm": 1.499070197255228, "learning_rate": 4.4303320864176426e-07, "loss": 0.467, "step": 3468 }, { "epoch": 0.5596966763472088, "grad_norm": 1.7542809848579641, "learning_rate": 4.427681746638984e-07, "loss": 0.5007, "step": 3469 }, { "epoch": 0.5598580187157147, "grad_norm": 3.5959170234874898, "learning_rate": 4.425031569790577e-07, "loss": 0.4674, "step": 3470 }, { "epoch": 0.5600193610842207, "grad_norm": 1.3850211484788613, "learning_rate": 4.422381556626888e-07, "loss": 0.4588, "step": 3471 }, { "epoch": 0.5601807034527266, "grad_norm": 1.2512025770409623, "learning_rate": 4.419731707902335e-07, "loss": 0.4645, "step": 3472 }, { "epoch": 0.5603420458212327, "grad_norm": 1.474496192883233, "learning_rate": 4.417082024371288e-07, "loss": 0.3665, "step": 3473 }, { "epoch": 0.5605033881897387, "grad_norm": 1.482796642994483, "learning_rate": 4.4144325067880707e-07, "loss": 0.6156, "step": 3474 }, { "epoch": 0.5606647305582446, "grad_norm": 1.5014904088270702, "learning_rate": 4.411783155906963e-07, "loss": 0.5341, "step": 3475 }, { "epoch": 0.5608260729267506, "grad_norm": 1.5949066303392565, "learning_rate": 4.409133972482193e-07, "loss": 0.4272, "step": 3476 }, { "epoch": 0.5609874152952565, "grad_norm": 1.8596743815489325, "learning_rate": 4.406484957267943e-07, "loss": 0.6024, "step": 3477 }, { "epoch": 0.5611487576637625, "grad_norm": 1.6706933810378264, "learning_rate": 4.4038361110183454e-07, "loss": 0.4658, "step": 3478 }, { "epoch": 0.5613101000322684, "grad_norm": 1.9505924743072232, "learning_rate": 4.401187434487487e-07, "loss": 0.5043, "step": 3479 }, { "epoch": 0.5614714424007744, "grad_norm": 2.2049503204908087, "learning_rate": 4.398538928429408e-07, "loss": 0.4608, "step": 3480 }, { "epoch": 0.5616327847692804, "grad_norm": 1.4709641831560862, "learning_rate": 4.395890593598096e-07, "loss": 0.5216, "step": 3481 }, { "epoch": 0.5617941271377864, "grad_norm": 1.8606087574427637, "learning_rate": 4.3932424307474915e-07, "loss": 0.5876, "step": 3482 }, { "epoch": 0.5619554695062924, "grad_norm": 1.4860341486902675, "learning_rate": 4.3905944406314843e-07, "loss": 0.6024, "step": 3483 }, { "epoch": 0.5621168118747983, "grad_norm": 1.629616488759365, "learning_rate": 4.3879466240039194e-07, "loss": 0.4905, "step": 3484 }, { "epoch": 0.5622781542433043, "grad_norm": 1.7032842299194977, "learning_rate": 4.385298981618588e-07, "loss": 0.4257, "step": 3485 }, { "epoch": 0.5624394966118103, "grad_norm": 1.2699843786435312, "learning_rate": 4.3826515142292334e-07, "loss": 0.4321, "step": 3486 }, { "epoch": 0.5626008389803162, "grad_norm": 1.6905379324050285, "learning_rate": 4.380004222589549e-07, "loss": 0.5293, "step": 3487 }, { "epoch": 0.5627621813488222, "grad_norm": 1.1985338473229328, "learning_rate": 4.377357107453179e-07, "loss": 0.4406, "step": 3488 }, { "epoch": 0.5629235237173281, "grad_norm": 1.5834455784948631, "learning_rate": 4.3747101695737163e-07, "loss": 0.4208, "step": 3489 }, { "epoch": 0.5630848660858342, "grad_norm": 1.753322512674242, "learning_rate": 4.372063409704702e-07, "loss": 0.588, "step": 3490 }, { "epoch": 0.5632462084543401, "grad_norm": 1.843500217393242, "learning_rate": 4.369416828599631e-07, "loss": 0.5188, "step": 3491 }, { "epoch": 0.5634075508228461, "grad_norm": 1.2744510572186711, "learning_rate": 4.3667704270119434e-07, "loss": 0.6117, "step": 3492 }, { "epoch": 0.5635688931913521, "grad_norm": 1.607198113293867, "learning_rate": 4.364124205695029e-07, "loss": 0.4506, "step": 3493 }, { "epoch": 0.563730235559858, "grad_norm": 1.6397494931913053, "learning_rate": 4.361478165402227e-07, "loss": 0.3946, "step": 3494 }, { "epoch": 0.563891577928364, "grad_norm": 1.0603966369435998, "learning_rate": 4.358832306886824e-07, "loss": 0.4398, "step": 3495 }, { "epoch": 0.5640529202968699, "grad_norm": 1.9178453704866423, "learning_rate": 4.356186630902054e-07, "loss": 0.575, "step": 3496 }, { "epoch": 0.5642142626653759, "grad_norm": 1.5667371899714309, "learning_rate": 4.3535411382011036e-07, "loss": 0.6115, "step": 3497 }, { "epoch": 0.5643756050338818, "grad_norm": 1.8664413301948304, "learning_rate": 4.350895829537102e-07, "loss": 0.4539, "step": 3498 }, { "epoch": 0.5645369474023879, "grad_norm": 2.14803063441476, "learning_rate": 4.348250705663129e-07, "loss": 0.651, "step": 3499 }, { "epoch": 0.5646982897708939, "grad_norm": 1.2207989708466147, "learning_rate": 4.34560576733221e-07, "loss": 0.4714, "step": 3500 }, { "epoch": 0.5648596321393998, "grad_norm": 1.405236960143752, "learning_rate": 4.342961015297319e-07, "loss": 0.3609, "step": 3501 }, { "epoch": 0.5650209745079058, "grad_norm": 1.9462577024881953, "learning_rate": 4.3403164503113753e-07, "loss": 0.5816, "step": 3502 }, { "epoch": 0.5651823168764117, "grad_norm": 1.7667840849645187, "learning_rate": 4.3376720731272497e-07, "loss": 0.4561, "step": 3503 }, { "epoch": 0.5653436592449177, "grad_norm": 1.6383092613172194, "learning_rate": 4.335027884497753e-07, "loss": 0.4233, "step": 3504 }, { "epoch": 0.5655050016134237, "grad_norm": 1.3238315972817032, "learning_rate": 4.3323838851756446e-07, "loss": 0.5111, "step": 3505 }, { "epoch": 0.5656663439819296, "grad_norm": 1.6787073868763351, "learning_rate": 4.3297400759136333e-07, "loss": 0.4686, "step": 3506 }, { "epoch": 0.5658276863504357, "grad_norm": 1.5731344345955633, "learning_rate": 4.3270964574643693e-07, "loss": 0.3635, "step": 3507 }, { "epoch": 0.5659890287189416, "grad_norm": 1.996513725636737, "learning_rate": 4.324453030580451e-07, "loss": 0.4401, "step": 3508 }, { "epoch": 0.5661503710874476, "grad_norm": 1.2475181737275485, "learning_rate": 4.32180979601442e-07, "loss": 0.3927, "step": 3509 }, { "epoch": 0.5663117134559535, "grad_norm": 1.577374345880402, "learning_rate": 4.3191667545187675e-07, "loss": 0.4266, "step": 3510 }, { "epoch": 0.5664730558244595, "grad_norm": 1.9595535683674574, "learning_rate": 4.3165239068459253e-07, "loss": 0.5055, "step": 3511 }, { "epoch": 0.5666343981929655, "grad_norm": 1.8736367674059817, "learning_rate": 4.313881253748272e-07, "loss": 0.6283, "step": 3512 }, { "epoch": 0.5667957405614714, "grad_norm": 1.630586192384594, "learning_rate": 4.311238795978129e-07, "loss": 0.7131, "step": 3513 }, { "epoch": 0.5669570829299774, "grad_norm": 1.776510860930638, "learning_rate": 4.308596534287766e-07, "loss": 0.5432, "step": 3514 }, { "epoch": 0.5671184252984833, "grad_norm": 1.511609213948893, "learning_rate": 4.3059544694293937e-07, "loss": 0.311, "step": 3515 }, { "epoch": 0.5672797676669894, "grad_norm": 1.5900746020923409, "learning_rate": 4.303312602155168e-07, "loss": 0.4954, "step": 3516 }, { "epoch": 0.5674411100354954, "grad_norm": 1.879063209116117, "learning_rate": 4.3006709332171876e-07, "loss": 0.5672, "step": 3517 }, { "epoch": 0.5676024524040013, "grad_norm": 1.4818195229985411, "learning_rate": 4.298029463367494e-07, "loss": 0.5329, "step": 3518 }, { "epoch": 0.5677637947725073, "grad_norm": 1.7606136129090253, "learning_rate": 4.2953881933580756e-07, "loss": 0.6197, "step": 3519 }, { "epoch": 0.5679251371410132, "grad_norm": 1.6286615055193472, "learning_rate": 4.29274712394086e-07, "loss": 0.4647, "step": 3520 }, { "epoch": 0.5680864795095192, "grad_norm": 1.9864451506133423, "learning_rate": 4.2901062558677206e-07, "loss": 0.6138, "step": 3521 }, { "epoch": 0.5682478218780251, "grad_norm": 1.6140039778640303, "learning_rate": 4.2874655898904695e-07, "loss": 0.4914, "step": 3522 }, { "epoch": 0.5684091642465311, "grad_norm": 1.5941824232892436, "learning_rate": 4.284825126760867e-07, "loss": 0.4149, "step": 3523 }, { "epoch": 0.5685705066150372, "grad_norm": 1.6387124676047211, "learning_rate": 4.2821848672306107e-07, "loss": 0.5912, "step": 3524 }, { "epoch": 0.5687318489835431, "grad_norm": 2.219301934163463, "learning_rate": 4.2795448120513404e-07, "loss": 0.6487, "step": 3525 }, { "epoch": 0.5688931913520491, "grad_norm": 1.1522130342472992, "learning_rate": 4.2769049619746444e-07, "loss": 0.3627, "step": 3526 }, { "epoch": 0.569054533720555, "grad_norm": 1.5162239071203953, "learning_rate": 4.2742653177520447e-07, "loss": 0.4566, "step": 3527 }, { "epoch": 0.569215876089061, "grad_norm": 1.4845300693826464, "learning_rate": 4.2716258801350063e-07, "loss": 0.427, "step": 3528 }, { "epoch": 0.569377218457567, "grad_norm": 1.8323766061578153, "learning_rate": 4.2689866498749395e-07, "loss": 0.6103, "step": 3529 }, { "epoch": 0.5695385608260729, "grad_norm": 1.6734313850754143, "learning_rate": 4.2663476277231915e-07, "loss": 0.532, "step": 3530 }, { "epoch": 0.5696999031945789, "grad_norm": 1.4974063065756924, "learning_rate": 4.2637088144310506e-07, "loss": 0.4589, "step": 3531 }, { "epoch": 0.5698612455630848, "grad_norm": 1.54301325094055, "learning_rate": 4.2610702107497485e-07, "loss": 0.4666, "step": 3532 }, { "epoch": 0.5700225879315909, "grad_norm": 1.6121625000001893, "learning_rate": 4.258431817430454e-07, "loss": 0.3974, "step": 3533 }, { "epoch": 0.5701839303000968, "grad_norm": 1.7390872758195548, "learning_rate": 4.255793635224278e-07, "loss": 0.5002, "step": 3534 }, { "epoch": 0.5703452726686028, "grad_norm": 1.2039409694331524, "learning_rate": 4.2531556648822694e-07, "loss": 0.5854, "step": 3535 }, { "epoch": 0.5705066150371088, "grad_norm": 1.6973265888252917, "learning_rate": 4.250517907155418e-07, "loss": 0.4262, "step": 3536 }, { "epoch": 0.5706679574056147, "grad_norm": 1.5280758403211516, "learning_rate": 4.2478803627946545e-07, "loss": 0.294, "step": 3537 }, { "epoch": 0.5708292997741207, "grad_norm": 1.60535016515268, "learning_rate": 4.245243032550848e-07, "loss": 0.4926, "step": 3538 }, { "epoch": 0.5709906421426266, "grad_norm": 1.6527214610587708, "learning_rate": 4.2426059171748046e-07, "loss": 0.5371, "step": 3539 }, { "epoch": 0.5711519845111326, "grad_norm": 2.021555610912719, "learning_rate": 4.2399690174172713e-07, "loss": 0.6306, "step": 3540 }, { "epoch": 0.5713133268796385, "grad_norm": 1.9885964134464036, "learning_rate": 4.237332334028932e-07, "loss": 0.4917, "step": 3541 }, { "epoch": 0.5714746692481446, "grad_norm": 1.2715177985748334, "learning_rate": 4.2346958677604116e-07, "loss": 0.4566, "step": 3542 }, { "epoch": 0.5716360116166506, "grad_norm": 1.4229223564931546, "learning_rate": 4.2320596193622716e-07, "loss": 0.5086, "step": 3543 }, { "epoch": 0.5717973539851565, "grad_norm": 1.4457893203646979, "learning_rate": 4.229423589585009e-07, "loss": 0.439, "step": 3544 }, { "epoch": 0.5719586963536625, "grad_norm": 1.5187324836882523, "learning_rate": 4.2267877791790656e-07, "loss": 0.4698, "step": 3545 }, { "epoch": 0.5721200387221684, "grad_norm": 1.3447507192620762, "learning_rate": 4.224152188894813e-07, "loss": 0.3801, "step": 3546 }, { "epoch": 0.5722813810906744, "grad_norm": 1.940861892369961, "learning_rate": 4.221516819482566e-07, "loss": 0.705, "step": 3547 }, { "epoch": 0.5724427234591803, "grad_norm": 1.6213847071582264, "learning_rate": 4.2188816716925687e-07, "loss": 0.328, "step": 3548 }, { "epoch": 0.5726040658276863, "grad_norm": 1.2775071999035303, "learning_rate": 4.2162467462750133e-07, "loss": 0.3799, "step": 3549 }, { "epoch": 0.5727654081961924, "grad_norm": 1.7702788064415194, "learning_rate": 4.2136120439800197e-07, "loss": 0.4265, "step": 3550 }, { "epoch": 0.5729267505646983, "grad_norm": 1.128115102877752, "learning_rate": 4.2109775655576494e-07, "loss": 0.4053, "step": 3551 }, { "epoch": 0.5730880929332043, "grad_norm": 1.2713952567984925, "learning_rate": 4.2083433117578965e-07, "loss": 0.3713, "step": 3552 }, { "epoch": 0.5732494353017102, "grad_norm": 1.7667895502262907, "learning_rate": 4.2057092833306933e-07, "loss": 0.6656, "step": 3553 }, { "epoch": 0.5734107776702162, "grad_norm": 1.340590244011634, "learning_rate": 4.203075481025906e-07, "loss": 0.3898, "step": 3554 }, { "epoch": 0.5735721200387222, "grad_norm": 1.5939054226356635, "learning_rate": 4.20044190559334e-07, "loss": 0.5117, "step": 3555 }, { "epoch": 0.5737334624072281, "grad_norm": 2.090462566275061, "learning_rate": 4.197808557782734e-07, "loss": 0.5807, "step": 3556 }, { "epoch": 0.5738948047757341, "grad_norm": 2.277114359326391, "learning_rate": 4.1951754383437597e-07, "loss": 0.6221, "step": 3557 }, { "epoch": 0.57405614714424, "grad_norm": 1.6656849433461636, "learning_rate": 4.192542548026026e-07, "loss": 0.6288, "step": 3558 }, { "epoch": 0.5742174895127461, "grad_norm": 1.2436302012347142, "learning_rate": 4.189909887579077e-07, "loss": 0.5828, "step": 3559 }, { "epoch": 0.574378831881252, "grad_norm": 1.9383742913237294, "learning_rate": 4.1872774577523935e-07, "loss": 0.4453, "step": 3560 }, { "epoch": 0.574540174249758, "grad_norm": 1.2413236385295354, "learning_rate": 4.1846452592953857e-07, "loss": 0.3763, "step": 3561 }, { "epoch": 0.574701516618264, "grad_norm": 1.5043650534173816, "learning_rate": 4.1820132929574005e-07, "loss": 0.4249, "step": 3562 }, { "epoch": 0.5748628589867699, "grad_norm": 2.014432332462501, "learning_rate": 4.1793815594877174e-07, "loss": 0.6033, "step": 3563 }, { "epoch": 0.5750242013552759, "grad_norm": 1.764675121179515, "learning_rate": 4.1767500596355525e-07, "loss": 0.5748, "step": 3564 }, { "epoch": 0.5751855437237818, "grad_norm": 1.5289673431930793, "learning_rate": 4.1741187941500534e-07, "loss": 0.5194, "step": 3565 }, { "epoch": 0.5753468860922878, "grad_norm": 1.3499651162620876, "learning_rate": 4.1714877637802996e-07, "loss": 0.6367, "step": 3566 }, { "epoch": 0.5755082284607939, "grad_norm": 1.6472361014933805, "learning_rate": 4.168856969275305e-07, "loss": 0.4309, "step": 3567 }, { "epoch": 0.5756695708292998, "grad_norm": 1.302714380571239, "learning_rate": 4.1662264113840193e-07, "loss": 0.437, "step": 3568 }, { "epoch": 0.5758309131978058, "grad_norm": 1.436969078538286, "learning_rate": 4.16359609085532e-07, "loss": 0.3295, "step": 3569 }, { "epoch": 0.5759922555663117, "grad_norm": 1.4417318994541128, "learning_rate": 4.1609660084380197e-07, "loss": 0.5346, "step": 3570 }, { "epoch": 0.5761535979348177, "grad_norm": 1.6618533805826785, "learning_rate": 4.1583361648808603e-07, "loss": 0.4478, "step": 3571 }, { "epoch": 0.5763149403033236, "grad_norm": 1.2791642680854967, "learning_rate": 4.1557065609325214e-07, "loss": 0.5657, "step": 3572 }, { "epoch": 0.5764762826718296, "grad_norm": 1.4976139960101642, "learning_rate": 4.1530771973416106e-07, "loss": 0.3549, "step": 3573 }, { "epoch": 0.5766376250403356, "grad_norm": 1.765075733191805, "learning_rate": 4.150448074856667e-07, "loss": 0.5507, "step": 3574 }, { "epoch": 0.5767989674088415, "grad_norm": 1.1290653736997556, "learning_rate": 4.1478191942261606e-07, "loss": 0.4522, "step": 3575 }, { "epoch": 0.5769603097773476, "grad_norm": 1.2445553937103848, "learning_rate": 4.1451905561984934e-07, "loss": 0.3924, "step": 3576 }, { "epoch": 0.5771216521458535, "grad_norm": 1.564834757725232, "learning_rate": 4.1425621615220006e-07, "loss": 0.6158, "step": 3577 }, { "epoch": 0.5772829945143595, "grad_norm": 1.405590792961015, "learning_rate": 4.1399340109449443e-07, "loss": 0.3321, "step": 3578 }, { "epoch": 0.5774443368828654, "grad_norm": 1.7971731187393947, "learning_rate": 4.137306105215519e-07, "loss": 0.5572, "step": 3579 }, { "epoch": 0.5776056792513714, "grad_norm": 1.583913705098344, "learning_rate": 4.1346784450818485e-07, "loss": 0.347, "step": 3580 }, { "epoch": 0.5777670216198774, "grad_norm": 1.6960608592944966, "learning_rate": 4.132051031291989e-07, "loss": 0.5318, "step": 3581 }, { "epoch": 0.5779283639883833, "grad_norm": 1.7969116870617885, "learning_rate": 4.129423864593922e-07, "loss": 0.6631, "step": 3582 }, { "epoch": 0.5780897063568893, "grad_norm": 1.4195664497508311, "learning_rate": 4.126796945735567e-07, "loss": 0.4911, "step": 3583 }, { "epoch": 0.5782510487253952, "grad_norm": 1.328139271378976, "learning_rate": 4.1241702754647633e-07, "loss": 0.3412, "step": 3584 }, { "epoch": 0.5784123910939013, "grad_norm": 1.789681007049756, "learning_rate": 4.1215438545292844e-07, "loss": 0.6133, "step": 3585 }, { "epoch": 0.5785737334624073, "grad_norm": 1.402534620636309, "learning_rate": 4.1189176836768337e-07, "loss": 0.4554, "step": 3586 }, { "epoch": 0.5787350758309132, "grad_norm": 1.6553742594663627, "learning_rate": 4.116291763655041e-07, "loss": 0.488, "step": 3587 }, { "epoch": 0.5788964181994192, "grad_norm": 2.021419977461577, "learning_rate": 4.1136660952114654e-07, "loss": 0.5261, "step": 3588 }, { "epoch": 0.5790577605679251, "grad_norm": 1.3542665982664928, "learning_rate": 4.111040679093594e-07, "loss": 0.4471, "step": 3589 }, { "epoch": 0.5792191029364311, "grad_norm": 1.381720289371629, "learning_rate": 4.1084155160488445e-07, "loss": 0.4459, "step": 3590 }, { "epoch": 0.579380445304937, "grad_norm": 2.609202122243825, "learning_rate": 4.1057906068245587e-07, "loss": 0.6089, "step": 3591 }, { "epoch": 0.579541787673443, "grad_norm": 2.0010170734674166, "learning_rate": 4.1031659521680086e-07, "loss": 0.6438, "step": 3592 }, { "epoch": 0.5797031300419491, "grad_norm": 1.8180892530633617, "learning_rate": 4.1005415528263926e-07, "loss": 0.4827, "step": 3593 }, { "epoch": 0.579864472410455, "grad_norm": 1.5888125662860846, "learning_rate": 4.097917409546838e-07, "loss": 0.409, "step": 3594 }, { "epoch": 0.580025814778961, "grad_norm": 2.0461574781936083, "learning_rate": 4.0952935230763985e-07, "loss": 0.6434, "step": 3595 }, { "epoch": 0.5801871571474669, "grad_norm": 1.3349463372035582, "learning_rate": 4.0926698941620546e-07, "loss": 0.5217, "step": 3596 }, { "epoch": 0.5803484995159729, "grad_norm": 1.9372628589832983, "learning_rate": 4.090046523550713e-07, "loss": 0.5698, "step": 3597 }, { "epoch": 0.5805098418844788, "grad_norm": 2.6361445048871985, "learning_rate": 4.0874234119892057e-07, "loss": 0.4486, "step": 3598 }, { "epoch": 0.5806711842529848, "grad_norm": 2.0580070571630125, "learning_rate": 4.0848005602242955e-07, "loss": 0.4604, "step": 3599 }, { "epoch": 0.5808325266214908, "grad_norm": 1.70030405747418, "learning_rate": 4.0821779690026657e-07, "loss": 0.3512, "step": 3600 }, { "epoch": 0.5809938689899967, "grad_norm": 1.6076388577634488, "learning_rate": 4.0795556390709283e-07, "loss": 0.6071, "step": 3601 }, { "epoch": 0.5811552113585028, "grad_norm": 1.3692058814328936, "learning_rate": 4.0769335711756205e-07, "loss": 0.4897, "step": 3602 }, { "epoch": 0.5813165537270087, "grad_norm": 1.5893145156014095, "learning_rate": 4.074311766063206e-07, "loss": 0.5085, "step": 3603 }, { "epoch": 0.5814778960955147, "grad_norm": 1.0537831137670577, "learning_rate": 4.071690224480072e-07, "loss": 0.3903, "step": 3604 }, { "epoch": 0.5816392384640207, "grad_norm": 1.5551233495380494, "learning_rate": 4.069068947172529e-07, "loss": 0.6037, "step": 3605 }, { "epoch": 0.5818005808325266, "grad_norm": 1.848719865348303, "learning_rate": 4.0664479348868186e-07, "loss": 0.5297, "step": 3606 }, { "epoch": 0.5819619232010326, "grad_norm": 1.2192880958704986, "learning_rate": 4.063827188369101e-07, "loss": 0.4848, "step": 3607 }, { "epoch": 0.5821232655695385, "grad_norm": 1.8756274127277337, "learning_rate": 4.061206708365462e-07, "loss": 0.5995, "step": 3608 }, { "epoch": 0.5822846079380445, "grad_norm": 1.330401724327149, "learning_rate": 4.058586495621913e-07, "loss": 0.6215, "step": 3609 }, { "epoch": 0.5824459503065506, "grad_norm": 1.7277813229962304, "learning_rate": 4.055966550884389e-07, "loss": 0.5142, "step": 3610 }, { "epoch": 0.5826072926750565, "grad_norm": 1.5794437299596997, "learning_rate": 4.0533468748987457e-07, "loss": 0.4536, "step": 3611 }, { "epoch": 0.5827686350435625, "grad_norm": 1.2898105647105897, "learning_rate": 4.0507274684107685e-07, "loss": 0.5757, "step": 3612 }, { "epoch": 0.5829299774120684, "grad_norm": 2.0875742916204607, "learning_rate": 4.04810833216616e-07, "loss": 0.6091, "step": 3613 }, { "epoch": 0.5830913197805744, "grad_norm": 2.127319864789709, "learning_rate": 4.045489466910549e-07, "loss": 0.6137, "step": 3614 }, { "epoch": 0.5832526621490803, "grad_norm": 1.5884384955583748, "learning_rate": 4.0428708733894844e-07, "loss": 0.5845, "step": 3615 }, { "epoch": 0.5834140045175863, "grad_norm": 1.8188114116564902, "learning_rate": 4.040252552348443e-07, "loss": 0.4918, "step": 3616 }, { "epoch": 0.5835753468860922, "grad_norm": 2.188060906568478, "learning_rate": 4.0376345045328167e-07, "loss": 0.4927, "step": 3617 }, { "epoch": 0.5837366892545982, "grad_norm": 1.6286571870299715, "learning_rate": 4.035016730687928e-07, "loss": 0.3745, "step": 3618 }, { "epoch": 0.5838980316231043, "grad_norm": 1.4930008673694133, "learning_rate": 4.0323992315590154e-07, "loss": 0.5226, "step": 3619 }, { "epoch": 0.5840593739916102, "grad_norm": 1.8314743659672637, "learning_rate": 4.02978200789124e-07, "loss": 0.4071, "step": 3620 }, { "epoch": 0.5842207163601162, "grad_norm": 1.4567319342673204, "learning_rate": 4.027165060429686e-07, "loss": 0.4359, "step": 3621 }, { "epoch": 0.5843820587286221, "grad_norm": 1.6281057169938344, "learning_rate": 4.0245483899193586e-07, "loss": 0.4079, "step": 3622 }, { "epoch": 0.5845434010971281, "grad_norm": 1.602118642126068, "learning_rate": 4.021931997105184e-07, "loss": 0.4484, "step": 3623 }, { "epoch": 0.5847047434656341, "grad_norm": 1.6622893149743387, "learning_rate": 4.019315882732007e-07, "loss": 0.4474, "step": 3624 }, { "epoch": 0.58486608583414, "grad_norm": 1.1170875664677866, "learning_rate": 4.016700047544599e-07, "loss": 0.4179, "step": 3625 }, { "epoch": 0.585027428202646, "grad_norm": 1.2759373940143826, "learning_rate": 4.0140844922876457e-07, "loss": 0.5122, "step": 3626 }, { "epoch": 0.585188770571152, "grad_norm": 2.175760210849805, "learning_rate": 4.0114692177057564e-07, "loss": 0.5167, "step": 3627 }, { "epoch": 0.585350112939658, "grad_norm": 1.4428052445052233, "learning_rate": 4.008854224543458e-07, "loss": 0.4124, "step": 3628 }, { "epoch": 0.585511455308164, "grad_norm": 1.3558448467613762, "learning_rate": 4.006239513545202e-07, "loss": 0.4068, "step": 3629 }, { "epoch": 0.5856727976766699, "grad_norm": 1.7507368307679199, "learning_rate": 4.003625085455355e-07, "loss": 0.6274, "step": 3630 }, { "epoch": 0.5858341400451759, "grad_norm": 1.3844752686572703, "learning_rate": 4.001010941018206e-07, "loss": 0.6144, "step": 3631 }, { "epoch": 0.5859954824136818, "grad_norm": 1.5505914451846303, "learning_rate": 3.9983970809779605e-07, "loss": 0.4998, "step": 3632 }, { "epoch": 0.5861568247821878, "grad_norm": 1.2384027848635024, "learning_rate": 3.9957835060787455e-07, "loss": 0.4057, "step": 3633 }, { "epoch": 0.5863181671506937, "grad_norm": 1.223573383370776, "learning_rate": 3.993170217064604e-07, "loss": 0.4454, "step": 3634 }, { "epoch": 0.5864795095191997, "grad_norm": 1.5537216023115672, "learning_rate": 3.990557214679502e-07, "loss": 0.5338, "step": 3635 }, { "epoch": 0.5866408518877058, "grad_norm": 1.722449534636149, "learning_rate": 3.9879444996673197e-07, "loss": 0.5806, "step": 3636 }, { "epoch": 0.5868021942562117, "grad_norm": 1.6646401562339845, "learning_rate": 3.985332072771856e-07, "loss": 0.4647, "step": 3637 }, { "epoch": 0.5869635366247177, "grad_norm": 2.3262838982065217, "learning_rate": 3.9827199347368317e-07, "loss": 0.6227, "step": 3638 }, { "epoch": 0.5871248789932236, "grad_norm": 1.9447971871748386, "learning_rate": 3.980108086305881e-07, "loss": 0.6161, "step": 3639 }, { "epoch": 0.5872862213617296, "grad_norm": 1.4251579699042203, "learning_rate": 3.9774965282225547e-07, "loss": 0.4973, "step": 3640 }, { "epoch": 0.5874475637302355, "grad_norm": 1.321845208952841, "learning_rate": 3.974885261230329e-07, "loss": 0.4248, "step": 3641 }, { "epoch": 0.5876089060987415, "grad_norm": 1.386353866497243, "learning_rate": 3.9722742860725884e-07, "loss": 0.3606, "step": 3642 }, { "epoch": 0.5877702484672475, "grad_norm": 1.3598025734738073, "learning_rate": 3.969663603492637e-07, "loss": 0.4519, "step": 3643 }, { "epoch": 0.5879315908357534, "grad_norm": 1.5739678497666332, "learning_rate": 3.9670532142336976e-07, "loss": 0.4329, "step": 3644 }, { "epoch": 0.5880929332042595, "grad_norm": 1.5092920347675491, "learning_rate": 3.9644431190389076e-07, "loss": 0.4342, "step": 3645 }, { "epoch": 0.5882542755727654, "grad_norm": 1.8251578223560745, "learning_rate": 3.9618333186513214e-07, "loss": 0.598, "step": 3646 }, { "epoch": 0.5884156179412714, "grad_norm": 1.526660346777438, "learning_rate": 3.959223813813908e-07, "loss": 0.6291, "step": 3647 }, { "epoch": 0.5885769603097774, "grad_norm": 1.6133586135307556, "learning_rate": 3.9566146052695554e-07, "loss": 0.5126, "step": 3648 }, { "epoch": 0.5887383026782833, "grad_norm": 2.227025569479497, "learning_rate": 3.9540056937610646e-07, "loss": 0.6732, "step": 3649 }, { "epoch": 0.5888996450467893, "grad_norm": 1.5955051406686682, "learning_rate": 3.9513970800311515e-07, "loss": 0.5954, "step": 3650 }, { "epoch": 0.5890609874152952, "grad_norm": 1.294681532521713, "learning_rate": 3.948788764822449e-07, "loss": 0.4292, "step": 3651 }, { "epoch": 0.5892223297838012, "grad_norm": 1.714803430174486, "learning_rate": 3.9461807488775064e-07, "loss": 0.4475, "step": 3652 }, { "epoch": 0.5893836721523072, "grad_norm": 2.759278769451388, "learning_rate": 3.943573032938785e-07, "loss": 0.7444, "step": 3653 }, { "epoch": 0.5895450145208132, "grad_norm": 1.4270693460414792, "learning_rate": 3.9409656177486614e-07, "loss": 0.5741, "step": 3654 }, { "epoch": 0.5897063568893192, "grad_norm": 1.5583238869991027, "learning_rate": 3.9383585040494275e-07, "loss": 0.4831, "step": 3655 }, { "epoch": 0.5898676992578251, "grad_norm": 1.3665885703328087, "learning_rate": 3.935751692583287e-07, "loss": 0.3751, "step": 3656 }, { "epoch": 0.5900290416263311, "grad_norm": 1.6813751259227239, "learning_rate": 3.933145184092361e-07, "loss": 0.49, "step": 3657 }, { "epoch": 0.590190383994837, "grad_norm": 1.5150867568250959, "learning_rate": 3.9305389793186816e-07, "loss": 0.505, "step": 3658 }, { "epoch": 0.590351726363343, "grad_norm": 1.2163872434960141, "learning_rate": 3.9279330790041957e-07, "loss": 0.491, "step": 3659 }, { "epoch": 0.590513068731849, "grad_norm": 1.7907767266506858, "learning_rate": 3.9253274838907627e-07, "loss": 0.5155, "step": 3660 }, { "epoch": 0.5906744111003549, "grad_norm": 1.3950968645162445, "learning_rate": 3.9227221947201573e-07, "loss": 0.4045, "step": 3661 }, { "epoch": 0.590835753468861, "grad_norm": 1.360611189078669, "learning_rate": 3.9201172122340645e-07, "loss": 0.3787, "step": 3662 }, { "epoch": 0.5909970958373669, "grad_norm": 1.5537914204926626, "learning_rate": 3.91751253717408e-07, "loss": 0.3546, "step": 3663 }, { "epoch": 0.5911584382058729, "grad_norm": 2.16049936615101, "learning_rate": 3.91490817028172e-07, "loss": 0.588, "step": 3664 }, { "epoch": 0.5913197805743788, "grad_norm": 1.1920299960944987, "learning_rate": 3.9123041122984054e-07, "loss": 0.4474, "step": 3665 }, { "epoch": 0.5914811229428848, "grad_norm": 1.1112610272095336, "learning_rate": 3.9097003639654725e-07, "loss": 0.3397, "step": 3666 }, { "epoch": 0.5916424653113908, "grad_norm": 3.099375378838878, "learning_rate": 3.907096926024168e-07, "loss": 0.5358, "step": 3667 }, { "epoch": 0.5918038076798967, "grad_norm": 2.7688397188482896, "learning_rate": 3.904493799215651e-07, "loss": 0.7017, "step": 3668 }, { "epoch": 0.5919651500484027, "grad_norm": 1.6764886958932605, "learning_rate": 3.9018909842809917e-07, "loss": 0.4971, "step": 3669 }, { "epoch": 0.5921264924169087, "grad_norm": 1.6708311583519888, "learning_rate": 3.8992884819611725e-07, "loss": 0.5103, "step": 3670 }, { "epoch": 0.5922878347854147, "grad_norm": 1.7851481030568337, "learning_rate": 3.896686292997085e-07, "loss": 0.6872, "step": 3671 }, { "epoch": 0.5924491771539206, "grad_norm": 2.4344836425043686, "learning_rate": 3.8940844181295337e-07, "loss": 0.7124, "step": 3672 }, { "epoch": 0.5926105195224266, "grad_norm": 1.4995941566754345, "learning_rate": 3.89148285809923e-07, "loss": 0.4983, "step": 3673 }, { "epoch": 0.5927718618909326, "grad_norm": 1.8154258789547866, "learning_rate": 3.888881613646799e-07, "loss": 0.6105, "step": 3674 }, { "epoch": 0.5929332042594385, "grad_norm": 1.3466430836129666, "learning_rate": 3.886280685512779e-07, "loss": 0.408, "step": 3675 }, { "epoch": 0.5930945466279445, "grad_norm": 1.4961313586575207, "learning_rate": 3.8836800744376114e-07, "loss": 0.5029, "step": 3676 }, { "epoch": 0.5932558889964504, "grad_norm": 3.1904918616722675, "learning_rate": 3.881079781161651e-07, "loss": 0.6046, "step": 3677 }, { "epoch": 0.5934172313649564, "grad_norm": 1.4315362036121186, "learning_rate": 3.8784798064251594e-07, "loss": 0.3937, "step": 3678 }, { "epoch": 0.5935785737334625, "grad_norm": 1.1824347735716867, "learning_rate": 3.8758801509683137e-07, "loss": 0.3284, "step": 3679 }, { "epoch": 0.5937399161019684, "grad_norm": 1.4672032592057083, "learning_rate": 3.873280815531193e-07, "loss": 0.4853, "step": 3680 }, { "epoch": 0.5939012584704744, "grad_norm": 1.8356587299784692, "learning_rate": 3.8706818008537896e-07, "loss": 0.6956, "step": 3681 }, { "epoch": 0.5940626008389803, "grad_norm": 1.884861697579771, "learning_rate": 3.868083107676001e-07, "loss": 0.6104, "step": 3682 }, { "epoch": 0.5942239432074863, "grad_norm": 1.6946753082315178, "learning_rate": 3.86548473673764e-07, "loss": 0.3683, "step": 3683 }, { "epoch": 0.5943852855759922, "grad_norm": 1.2810710688999216, "learning_rate": 3.8628866887784185e-07, "loss": 0.4535, "step": 3684 }, { "epoch": 0.5945466279444982, "grad_norm": 1.9518269612425434, "learning_rate": 3.860288964537963e-07, "loss": 0.5246, "step": 3685 }, { "epoch": 0.5947079703130042, "grad_norm": 2.007656581183029, "learning_rate": 3.8576915647558047e-07, "loss": 0.713, "step": 3686 }, { "epoch": 0.5948693126815101, "grad_norm": 1.788291531821141, "learning_rate": 3.8550944901713847e-07, "loss": 0.431, "step": 3687 }, { "epoch": 0.5950306550500162, "grad_norm": 1.2903293742108704, "learning_rate": 3.852497741524051e-07, "loss": 0.5281, "step": 3688 }, { "epoch": 0.5951919974185221, "grad_norm": 1.6394911708239222, "learning_rate": 3.849901319553058e-07, "loss": 0.4181, "step": 3689 }, { "epoch": 0.5953533397870281, "grad_norm": 1.3495775762805984, "learning_rate": 3.847305224997566e-07, "loss": 0.2998, "step": 3690 }, { "epoch": 0.595514682155534, "grad_norm": 1.3016559764180489, "learning_rate": 3.844709458596644e-07, "loss": 0.4051, "step": 3691 }, { "epoch": 0.59567602452404, "grad_norm": 1.657489744474004, "learning_rate": 3.8421140210892677e-07, "loss": 0.4712, "step": 3692 }, { "epoch": 0.595837366892546, "grad_norm": 1.4302115391942838, "learning_rate": 3.839518913214319e-07, "loss": 0.4341, "step": 3693 }, { "epoch": 0.5959987092610519, "grad_norm": 2.0418729742744794, "learning_rate": 3.836924135710583e-07, "loss": 0.5794, "step": 3694 }, { "epoch": 0.5961600516295579, "grad_norm": 2.0063839591393515, "learning_rate": 3.8343296893167537e-07, "loss": 0.6208, "step": 3695 }, { "epoch": 0.5963213939980639, "grad_norm": 1.6314034108485316, "learning_rate": 3.831735574771432e-07, "loss": 0.4343, "step": 3696 }, { "epoch": 0.5964827363665699, "grad_norm": 1.3397381799329144, "learning_rate": 3.82914179281312e-07, "loss": 0.5085, "step": 3697 }, { "epoch": 0.5966440787350759, "grad_norm": 1.3055523501462794, "learning_rate": 3.8265483441802316e-07, "loss": 0.3605, "step": 3698 }, { "epoch": 0.5968054211035818, "grad_norm": 2.0397392699105166, "learning_rate": 3.82395522961108e-07, "loss": 0.6211, "step": 3699 }, { "epoch": 0.5969667634720878, "grad_norm": 1.4144241966359226, "learning_rate": 3.8213624498438843e-07, "loss": 0.4808, "step": 3700 }, { "epoch": 0.5971281058405937, "grad_norm": 1.3548456348313251, "learning_rate": 3.818770005616769e-07, "loss": 0.4617, "step": 3701 }, { "epoch": 0.5972894482090997, "grad_norm": 1.5207855617736814, "learning_rate": 3.816177897667766e-07, "loss": 0.3263, "step": 3702 }, { "epoch": 0.5974507905776056, "grad_norm": 1.9604833920589433, "learning_rate": 3.8135861267348073e-07, "loss": 0.4247, "step": 3703 }, { "epoch": 0.5976121329461116, "grad_norm": 1.3593452274964573, "learning_rate": 3.810994693555729e-07, "loss": 0.3582, "step": 3704 }, { "epoch": 0.5977734753146177, "grad_norm": 1.3467007553079207, "learning_rate": 3.8084035988682745e-07, "loss": 0.3732, "step": 3705 }, { "epoch": 0.5979348176831236, "grad_norm": 1.9178229935029378, "learning_rate": 3.8058128434100883e-07, "loss": 0.4748, "step": 3706 }, { "epoch": 0.5980961600516296, "grad_norm": 2.096144602155527, "learning_rate": 3.8032224279187187e-07, "loss": 0.4307, "step": 3707 }, { "epoch": 0.5982575024201355, "grad_norm": 1.8057999258056787, "learning_rate": 3.800632353131616e-07, "loss": 0.3871, "step": 3708 }, { "epoch": 0.5984188447886415, "grad_norm": 1.8279922306722065, "learning_rate": 3.798042619786136e-07, "loss": 0.597, "step": 3709 }, { "epoch": 0.5985801871571474, "grad_norm": 1.7131277896673291, "learning_rate": 3.795453228619536e-07, "loss": 0.4702, "step": 3710 }, { "epoch": 0.5987415295256534, "grad_norm": 1.5686970462448568, "learning_rate": 3.7928641803689785e-07, "loss": 0.4225, "step": 3711 }, { "epoch": 0.5989028718941594, "grad_norm": 1.7115760900470869, "learning_rate": 3.7902754757715234e-07, "loss": 0.5963, "step": 3712 }, { "epoch": 0.5990642142626654, "grad_norm": 1.269407342586074, "learning_rate": 3.7876871155641354e-07, "loss": 0.4073, "step": 3713 }, { "epoch": 0.5992255566311714, "grad_norm": 1.1871963915952561, "learning_rate": 3.785099100483681e-07, "loss": 0.4204, "step": 3714 }, { "epoch": 0.5993868989996773, "grad_norm": 2.012240266328754, "learning_rate": 3.782511431266929e-07, "loss": 0.5601, "step": 3715 }, { "epoch": 0.5995482413681833, "grad_norm": 1.3316030848955818, "learning_rate": 3.7799241086505497e-07, "loss": 0.5062, "step": 3716 }, { "epoch": 0.5997095837366893, "grad_norm": 1.3028681055885885, "learning_rate": 3.7773371333711123e-07, "loss": 0.4024, "step": 3717 }, { "epoch": 0.5998709261051952, "grad_norm": 1.5514925999564828, "learning_rate": 3.774750506165091e-07, "loss": 0.6393, "step": 3718 }, { "epoch": 0.6000322684737012, "grad_norm": 1.9184276396999311, "learning_rate": 3.77216422776886e-07, "loss": 0.4471, "step": 3719 }, { "epoch": 0.6001936108422071, "grad_norm": 1.5112049895620696, "learning_rate": 3.7695782989186886e-07, "loss": 0.4086, "step": 3720 }, { "epoch": 0.6003549532107131, "grad_norm": 1.6692265005270093, "learning_rate": 3.766992720350756e-07, "loss": 0.5818, "step": 3721 }, { "epoch": 0.6005162955792192, "grad_norm": 2.4630883883911068, "learning_rate": 3.764407492801136e-07, "loss": 0.6206, "step": 3722 }, { "epoch": 0.6006776379477251, "grad_norm": 1.525279100861712, "learning_rate": 3.7618226170058007e-07, "loss": 0.6219, "step": 3723 }, { "epoch": 0.6008389803162311, "grad_norm": 1.6078094719653444, "learning_rate": 3.7592380937006287e-07, "loss": 0.5315, "step": 3724 }, { "epoch": 0.601000322684737, "grad_norm": 1.7372164433129074, "learning_rate": 3.756653923621391e-07, "loss": 0.5247, "step": 3725 }, { "epoch": 0.601161665053243, "grad_norm": 1.3114141332052385, "learning_rate": 3.754070107503762e-07, "loss": 0.383, "step": 3726 }, { "epoch": 0.6013230074217489, "grad_norm": 1.7743166454322792, "learning_rate": 3.751486646083315e-07, "loss": 0.5817, "step": 3727 }, { "epoch": 0.6014843497902549, "grad_norm": 1.8724436181292936, "learning_rate": 3.748903540095522e-07, "loss": 0.5278, "step": 3728 }, { "epoch": 0.6016456921587608, "grad_norm": 1.9308618223316412, "learning_rate": 3.746320790275754e-07, "loss": 0.5466, "step": 3729 }, { "epoch": 0.6018070345272669, "grad_norm": 2.3643376787160886, "learning_rate": 3.743738397359279e-07, "loss": 0.7202, "step": 3730 }, { "epoch": 0.6019683768957729, "grad_norm": 1.5938332479837845, "learning_rate": 3.741156362081267e-07, "loss": 0.5387, "step": 3731 }, { "epoch": 0.6021297192642788, "grad_norm": 1.932213184445721, "learning_rate": 3.73857468517678e-07, "loss": 0.7214, "step": 3732 }, { "epoch": 0.6022910616327848, "grad_norm": 2.685326517948163, "learning_rate": 3.7359933673807877e-07, "loss": 0.5714, "step": 3733 }, { "epoch": 0.6024524040012907, "grad_norm": 4.20703170337161, "learning_rate": 3.733412409428148e-07, "loss": 0.6309, "step": 3734 }, { "epoch": 0.6026137463697967, "grad_norm": 1.2851937239264823, "learning_rate": 3.730831812053621e-07, "loss": 0.337, "step": 3735 }, { "epoch": 0.6027750887383027, "grad_norm": 1.732256239549628, "learning_rate": 3.728251575991862e-07, "loss": 0.5915, "step": 3736 }, { "epoch": 0.6029364311068086, "grad_norm": 2.07220993901971, "learning_rate": 3.7256717019774276e-07, "loss": 0.43, "step": 3737 }, { "epoch": 0.6030977734753146, "grad_norm": 1.3140805128679582, "learning_rate": 3.723092190744766e-07, "loss": 0.4032, "step": 3738 }, { "epoch": 0.6032591158438206, "grad_norm": 1.8016558527849844, "learning_rate": 3.720513043028226e-07, "loss": 0.6397, "step": 3739 }, { "epoch": 0.6034204582123266, "grad_norm": 1.945133001786269, "learning_rate": 3.7179342595620496e-07, "loss": 0.5448, "step": 3740 }, { "epoch": 0.6035818005808326, "grad_norm": 1.7344858632439444, "learning_rate": 3.715355841080379e-07, "loss": 0.6145, "step": 3741 }, { "epoch": 0.6037431429493385, "grad_norm": 1.512260162632162, "learning_rate": 3.7127777883172506e-07, "loss": 0.5265, "step": 3742 }, { "epoch": 0.6039044853178445, "grad_norm": 1.25568152513583, "learning_rate": 3.7102001020065933e-07, "loss": 0.4333, "step": 3743 }, { "epoch": 0.6040658276863504, "grad_norm": 1.762911914821139, "learning_rate": 3.7076227828822395e-07, "loss": 0.5259, "step": 3744 }, { "epoch": 0.6042271700548564, "grad_norm": 1.6258060950196076, "learning_rate": 3.7050458316779086e-07, "loss": 0.4793, "step": 3745 }, { "epoch": 0.6043885124233623, "grad_norm": 1.434633714535596, "learning_rate": 3.7024692491272235e-07, "loss": 0.3577, "step": 3746 }, { "epoch": 0.6045498547918683, "grad_norm": 1.293658300273616, "learning_rate": 3.699893035963695e-07, "loss": 0.463, "step": 3747 }, { "epoch": 0.6047111971603744, "grad_norm": 1.6402745917182806, "learning_rate": 3.6973171929207316e-07, "loss": 0.5649, "step": 3748 }, { "epoch": 0.6048725395288803, "grad_norm": 1.243358372128508, "learning_rate": 3.6947417207316356e-07, "loss": 0.5305, "step": 3749 }, { "epoch": 0.6050338818973863, "grad_norm": 1.5048969127036544, "learning_rate": 3.6921666201296075e-07, "loss": 0.5523, "step": 3750 }, { "epoch": 0.6051952242658922, "grad_norm": 2.038077748770684, "learning_rate": 3.6895918918477363e-07, "loss": 0.5062, "step": 3751 }, { "epoch": 0.6053565666343982, "grad_norm": 1.3272237524762989, "learning_rate": 3.6870175366190095e-07, "loss": 0.4548, "step": 3752 }, { "epoch": 0.6055179090029041, "grad_norm": 1.9513581485823397, "learning_rate": 3.6844435551763047e-07, "loss": 0.4829, "step": 3753 }, { "epoch": 0.6056792513714101, "grad_norm": 1.7210857818876206, "learning_rate": 3.6818699482523976e-07, "loss": 0.5079, "step": 3754 }, { "epoch": 0.6058405937399161, "grad_norm": 1.6250850948648068, "learning_rate": 3.6792967165799506e-07, "loss": 0.403, "step": 3755 }, { "epoch": 0.6060019361084221, "grad_norm": 1.4298657582368945, "learning_rate": 3.6767238608915285e-07, "loss": 0.5651, "step": 3756 }, { "epoch": 0.6061632784769281, "grad_norm": 1.5357160251392132, "learning_rate": 3.674151381919582e-07, "loss": 0.4808, "step": 3757 }, { "epoch": 0.606324620845434, "grad_norm": 1.3616710871169868, "learning_rate": 3.6715792803964544e-07, "loss": 0.4952, "step": 3758 }, { "epoch": 0.60648596321394, "grad_norm": 1.635096727245621, "learning_rate": 3.669007557054387e-07, "loss": 0.4822, "step": 3759 }, { "epoch": 0.606647305582446, "grad_norm": 1.76094616783737, "learning_rate": 3.6664362126255084e-07, "loss": 0.5398, "step": 3760 }, { "epoch": 0.6068086479509519, "grad_norm": 1.7467601622365994, "learning_rate": 3.663865247841841e-07, "loss": 0.6834, "step": 3761 }, { "epoch": 0.6069699903194579, "grad_norm": 1.2463363841736121, "learning_rate": 3.661294663435298e-07, "loss": 0.4251, "step": 3762 }, { "epoch": 0.6071313326879638, "grad_norm": 2.103109024690034, "learning_rate": 3.658724460137688e-07, "loss": 0.4994, "step": 3763 }, { "epoch": 0.6072926750564698, "grad_norm": 1.6236913253132386, "learning_rate": 3.656154638680707e-07, "loss": 0.4488, "step": 3764 }, { "epoch": 0.6074540174249758, "grad_norm": 1.93213014021789, "learning_rate": 3.653585199795945e-07, "loss": 0.541, "step": 3765 }, { "epoch": 0.6076153597934818, "grad_norm": 1.1094278134951496, "learning_rate": 3.651016144214878e-07, "loss": 0.4025, "step": 3766 }, { "epoch": 0.6077767021619878, "grad_norm": 1.8232376760581235, "learning_rate": 3.6484474726688816e-07, "loss": 0.5079, "step": 3767 }, { "epoch": 0.6079380445304937, "grad_norm": 1.8311916615434203, "learning_rate": 3.645879185889217e-07, "loss": 0.6132, "step": 3768 }, { "epoch": 0.6080993868989997, "grad_norm": 1.43586372459928, "learning_rate": 3.6433112846070333e-07, "loss": 0.4619, "step": 3769 }, { "epoch": 0.6082607292675056, "grad_norm": 1.090122583740692, "learning_rate": 3.6407437695533754e-07, "loss": 0.3647, "step": 3770 }, { "epoch": 0.6084220716360116, "grad_norm": 1.4564473718314277, "learning_rate": 3.638176641459173e-07, "loss": 0.608, "step": 3771 }, { "epoch": 0.6085834140045175, "grad_norm": 2.4269351395536596, "learning_rate": 3.635609901055251e-07, "loss": 0.4358, "step": 3772 }, { "epoch": 0.6087447563730236, "grad_norm": 1.832709993974041, "learning_rate": 3.633043549072319e-07, "loss": 0.3851, "step": 3773 }, { "epoch": 0.6089060987415296, "grad_norm": 1.5589926455399512, "learning_rate": 3.6304775862409793e-07, "loss": 0.4612, "step": 3774 }, { "epoch": 0.6090674411100355, "grad_norm": 1.108075199804707, "learning_rate": 3.627912013291721e-07, "loss": 0.4543, "step": 3775 }, { "epoch": 0.6092287834785415, "grad_norm": 1.606565300538895, "learning_rate": 3.625346830954924e-07, "loss": 0.4305, "step": 3776 }, { "epoch": 0.6093901258470474, "grad_norm": 1.439589598771339, "learning_rate": 3.622782039960858e-07, "loss": 0.3779, "step": 3777 }, { "epoch": 0.6095514682155534, "grad_norm": 1.9186334953599689, "learning_rate": 3.620217641039676e-07, "loss": 0.6145, "step": 3778 }, { "epoch": 0.6097128105840594, "grad_norm": 2.2177977331754852, "learning_rate": 3.6176536349214277e-07, "loss": 0.6006, "step": 3779 }, { "epoch": 0.6098741529525653, "grad_norm": 1.3378874173091109, "learning_rate": 3.6150900223360437e-07, "loss": 0.2965, "step": 3780 }, { "epoch": 0.6100354953210713, "grad_norm": 1.4458498394085313, "learning_rate": 3.612526804013346e-07, "loss": 0.4034, "step": 3781 }, { "epoch": 0.6101968376895773, "grad_norm": 1.7878862537199744, "learning_rate": 3.609963980683045e-07, "loss": 0.2875, "step": 3782 }, { "epoch": 0.6103581800580833, "grad_norm": 2.0375519634711687, "learning_rate": 3.6074015530747346e-07, "loss": 0.6115, "step": 3783 }, { "epoch": 0.6105195224265892, "grad_norm": 1.6033446282341195, "learning_rate": 3.6048395219179e-07, "loss": 0.5427, "step": 3784 }, { "epoch": 0.6106808647950952, "grad_norm": 1.234973424401623, "learning_rate": 3.602277887941913e-07, "loss": 0.5094, "step": 3785 }, { "epoch": 0.6108422071636012, "grad_norm": 1.9617150577463325, "learning_rate": 3.5997166518760315e-07, "loss": 0.4835, "step": 3786 }, { "epoch": 0.6110035495321071, "grad_norm": 1.686274966881521, "learning_rate": 3.5971558144493985e-07, "loss": 0.425, "step": 3787 }, { "epoch": 0.6111648919006131, "grad_norm": 1.653650511086724, "learning_rate": 3.5945953763910465e-07, "loss": 0.4191, "step": 3788 }, { "epoch": 0.611326234269119, "grad_norm": 1.372832323569891, "learning_rate": 3.5920353384298926e-07, "loss": 0.6197, "step": 3789 }, { "epoch": 0.611487576637625, "grad_norm": 1.8457553109355205, "learning_rate": 3.5894757012947406e-07, "loss": 0.6131, "step": 3790 }, { "epoch": 0.611648919006131, "grad_norm": 1.880869452428708, "learning_rate": 3.5869164657142815e-07, "loss": 0.604, "step": 3791 }, { "epoch": 0.611810261374637, "grad_norm": 2.128948134264015, "learning_rate": 3.5843576324170885e-07, "loss": 0.4838, "step": 3792 }, { "epoch": 0.611971603743143, "grad_norm": 1.4559350684170826, "learning_rate": 3.581799202131623e-07, "loss": 0.4572, "step": 3793 }, { "epoch": 0.6121329461116489, "grad_norm": 1.8635347141955696, "learning_rate": 3.5792411755862323e-07, "loss": 0.5032, "step": 3794 }, { "epoch": 0.6122942884801549, "grad_norm": 1.7905326033848643, "learning_rate": 3.5766835535091454e-07, "loss": 0.4018, "step": 3795 }, { "epoch": 0.6124556308486608, "grad_norm": 1.5447973304828195, "learning_rate": 3.574126336628479e-07, "loss": 0.4108, "step": 3796 }, { "epoch": 0.6126169732171668, "grad_norm": 1.773001663495535, "learning_rate": 3.5715695256722335e-07, "loss": 0.5472, "step": 3797 }, { "epoch": 0.6127783155856728, "grad_norm": 1.5629181874463662, "learning_rate": 3.569013121368294e-07, "loss": 0.5205, "step": 3798 }, { "epoch": 0.6129396579541788, "grad_norm": 1.6824986553895291, "learning_rate": 3.5664571244444297e-07, "loss": 0.5511, "step": 3799 }, { "epoch": 0.6131010003226848, "grad_norm": 1.7264422896986837, "learning_rate": 3.563901535628294e-07, "loss": 0.3985, "step": 3800 }, { "epoch": 0.6132623426911907, "grad_norm": 1.4482726430230783, "learning_rate": 3.561346355647422e-07, "loss": 0.5129, "step": 3801 }, { "epoch": 0.6134236850596967, "grad_norm": 1.5177338077219915, "learning_rate": 3.5587915852292383e-07, "loss": 0.468, "step": 3802 }, { "epoch": 0.6135850274282026, "grad_norm": 1.6271576128948104, "learning_rate": 3.556237225101043e-07, "loss": 0.4959, "step": 3803 }, { "epoch": 0.6137463697967086, "grad_norm": 1.5126243067330705, "learning_rate": 3.553683275990026e-07, "loss": 0.5123, "step": 3804 }, { "epoch": 0.6139077121652146, "grad_norm": 1.2287179752961446, "learning_rate": 3.5511297386232575e-07, "loss": 0.4418, "step": 3805 }, { "epoch": 0.6140690545337205, "grad_norm": 1.82848445912225, "learning_rate": 3.548576613727689e-07, "loss": 0.6875, "step": 3806 }, { "epoch": 0.6142303969022265, "grad_norm": 1.3927797489881841, "learning_rate": 3.546023902030156e-07, "loss": 0.3113, "step": 3807 }, { "epoch": 0.6143917392707325, "grad_norm": 1.2936228685065663, "learning_rate": 3.543471604257378e-07, "loss": 0.432, "step": 3808 }, { "epoch": 0.6145530816392385, "grad_norm": 1.9642726910147017, "learning_rate": 3.5409197211359545e-07, "loss": 0.6214, "step": 3809 }, { "epoch": 0.6147144240077445, "grad_norm": 1.6098629850526722, "learning_rate": 3.5383682533923666e-07, "loss": 0.4993, "step": 3810 }, { "epoch": 0.6148757663762504, "grad_norm": 1.7475668477164326, "learning_rate": 3.53581720175298e-07, "loss": 0.4185, "step": 3811 }, { "epoch": 0.6150371087447564, "grad_norm": 1.5986489940050397, "learning_rate": 3.5332665669440364e-07, "loss": 0.5406, "step": 3812 }, { "epoch": 0.6151984511132623, "grad_norm": 1.5568710405505355, "learning_rate": 3.530716349691668e-07, "loss": 0.3526, "step": 3813 }, { "epoch": 0.6153597934817683, "grad_norm": 1.3038880417216996, "learning_rate": 3.52816655072188e-07, "loss": 0.4866, "step": 3814 }, { "epoch": 0.6155211358502742, "grad_norm": 1.263577488671599, "learning_rate": 3.525617170760561e-07, "loss": 0.4808, "step": 3815 }, { "epoch": 0.6156824782187803, "grad_norm": 1.4345157994663067, "learning_rate": 3.5230682105334785e-07, "loss": 0.4841, "step": 3816 }, { "epoch": 0.6158438205872863, "grad_norm": 1.8524785089726261, "learning_rate": 3.5205196707662866e-07, "loss": 0.4118, "step": 3817 }, { "epoch": 0.6160051629557922, "grad_norm": 1.6047308123871147, "learning_rate": 3.5179715521845124e-07, "loss": 0.5265, "step": 3818 }, { "epoch": 0.6161665053242982, "grad_norm": 1.4482470439641686, "learning_rate": 3.5154238555135683e-07, "loss": 0.3824, "step": 3819 }, { "epoch": 0.6163278476928041, "grad_norm": 2.257173018659254, "learning_rate": 3.512876581478742e-07, "loss": 0.6771, "step": 3820 }, { "epoch": 0.6164891900613101, "grad_norm": 1.106848243549382, "learning_rate": 3.510329730805206e-07, "loss": 0.5287, "step": 3821 }, { "epoch": 0.616650532429816, "grad_norm": 1.5260662385006039, "learning_rate": 3.507783304218009e-07, "loss": 0.3449, "step": 3822 }, { "epoch": 0.616811874798322, "grad_norm": 1.542814222879203, "learning_rate": 3.505237302442079e-07, "loss": 0.319, "step": 3823 }, { "epoch": 0.616973217166828, "grad_norm": 1.982332515111925, "learning_rate": 3.502691726202224e-07, "loss": 0.3567, "step": 3824 }, { "epoch": 0.617134559535334, "grad_norm": 1.5053391801221057, "learning_rate": 3.5001465762231304e-07, "loss": 0.4691, "step": 3825 }, { "epoch": 0.61729590190384, "grad_norm": 1.462936013590974, "learning_rate": 3.497601853229365e-07, "loss": 0.4459, "step": 3826 }, { "epoch": 0.6174572442723459, "grad_norm": 1.6623664773656168, "learning_rate": 3.495057557945371e-07, "loss": 0.4132, "step": 3827 }, { "epoch": 0.6176185866408519, "grad_norm": 1.9181249364590005, "learning_rate": 3.4925136910954695e-07, "loss": 0.488, "step": 3828 }, { "epoch": 0.6177799290093579, "grad_norm": 1.6744970990096324, "learning_rate": 3.4899702534038593e-07, "loss": 0.6198, "step": 3829 }, { "epoch": 0.6179412713778638, "grad_norm": 1.3714278811143659, "learning_rate": 3.4874272455946216e-07, "loss": 0.3528, "step": 3830 }, { "epoch": 0.6181026137463698, "grad_norm": 1.4539913753164524, "learning_rate": 3.484884668391709e-07, "loss": 0.351, "step": 3831 }, { "epoch": 0.6182639561148757, "grad_norm": 1.0922458114665952, "learning_rate": 3.4823425225189564e-07, "loss": 0.3453, "step": 3832 }, { "epoch": 0.6184252984833818, "grad_norm": 2.4200628576706236, "learning_rate": 3.47980080870007e-07, "loss": 0.5767, "step": 3833 }, { "epoch": 0.6185866408518877, "grad_norm": 1.870929686933177, "learning_rate": 3.4772595276586415e-07, "loss": 0.5649, "step": 3834 }, { "epoch": 0.6187479832203937, "grad_norm": 1.301987051900888, "learning_rate": 3.4747186801181307e-07, "loss": 0.3902, "step": 3835 }, { "epoch": 0.6189093255888997, "grad_norm": 1.5515473825077422, "learning_rate": 3.472178266801882e-07, "loss": 0.395, "step": 3836 }, { "epoch": 0.6190706679574056, "grad_norm": 2.4700739726677092, "learning_rate": 3.469638288433111e-07, "loss": 0.4903, "step": 3837 }, { "epoch": 0.6192320103259116, "grad_norm": 1.9130994592785602, "learning_rate": 3.4670987457349084e-07, "loss": 0.5517, "step": 3838 }, { "epoch": 0.6193933526944175, "grad_norm": 1.307702014043438, "learning_rate": 3.4645596394302456e-07, "loss": 0.6291, "step": 3839 }, { "epoch": 0.6195546950629235, "grad_norm": 1.5312784931879475, "learning_rate": 3.4620209702419666e-07, "loss": 0.4975, "step": 3840 }, { "epoch": 0.6197160374314294, "grad_norm": 1.8603544460484542, "learning_rate": 3.4594827388927917e-07, "loss": 0.624, "step": 3841 }, { "epoch": 0.6198773797999355, "grad_norm": 1.5261737996292575, "learning_rate": 3.456944946105315e-07, "loss": 0.5654, "step": 3842 }, { "epoch": 0.6200387221684415, "grad_norm": 1.4784538629046051, "learning_rate": 3.45440759260201e-07, "loss": 0.5124, "step": 3843 }, { "epoch": 0.6202000645369474, "grad_norm": 1.4012591064456679, "learning_rate": 3.451870679105222e-07, "loss": 0.4084, "step": 3844 }, { "epoch": 0.6203614069054534, "grad_norm": 1.7099411384312075, "learning_rate": 3.449334206337169e-07, "loss": 0.4571, "step": 3845 }, { "epoch": 0.6205227492739593, "grad_norm": 1.9259492441067194, "learning_rate": 3.446798175019948e-07, "loss": 0.7193, "step": 3846 }, { "epoch": 0.6206840916424653, "grad_norm": 1.825422980007297, "learning_rate": 3.444262585875526e-07, "loss": 0.5103, "step": 3847 }, { "epoch": 0.6208454340109713, "grad_norm": 1.4250099081397676, "learning_rate": 3.44172743962575e-07, "loss": 0.465, "step": 3848 }, { "epoch": 0.6210067763794772, "grad_norm": 1.2337455593288165, "learning_rate": 3.439192736992337e-07, "loss": 0.4219, "step": 3849 }, { "epoch": 0.6211681187479832, "grad_norm": 1.5018902630294322, "learning_rate": 3.4366584786968756e-07, "loss": 0.489, "step": 3850 }, { "epoch": 0.6213294611164892, "grad_norm": 2.009391549666671, "learning_rate": 3.434124665460831e-07, "loss": 0.6102, "step": 3851 }, { "epoch": 0.6214908034849952, "grad_norm": 2.0112927624677073, "learning_rate": 3.4315912980055426e-07, "loss": 0.6037, "step": 3852 }, { "epoch": 0.6216521458535011, "grad_norm": 1.8526407313463016, "learning_rate": 3.429058377052221e-07, "loss": 0.5838, "step": 3853 }, { "epoch": 0.6218134882220071, "grad_norm": 1.774800117413747, "learning_rate": 3.426525903321948e-07, "loss": 0.6431, "step": 3854 }, { "epoch": 0.6219748305905131, "grad_norm": 1.2380818588543503, "learning_rate": 3.4239938775356815e-07, "loss": 0.3425, "step": 3855 }, { "epoch": 0.622136172959019, "grad_norm": 1.2231750362650642, "learning_rate": 3.4214623004142516e-07, "loss": 0.2404, "step": 3856 }, { "epoch": 0.622297515327525, "grad_norm": 1.5945007949305574, "learning_rate": 3.4189311726783587e-07, "loss": 0.4167, "step": 3857 }, { "epoch": 0.6224588576960309, "grad_norm": 1.4054779476524277, "learning_rate": 3.4164004950485736e-07, "loss": 0.4344, "step": 3858 }, { "epoch": 0.622620200064537, "grad_norm": 1.4912691336057402, "learning_rate": 3.4138702682453456e-07, "loss": 0.4757, "step": 3859 }, { "epoch": 0.622781542433043, "grad_norm": 1.5715001867126102, "learning_rate": 3.41134049298899e-07, "loss": 0.5831, "step": 3860 }, { "epoch": 0.6229428848015489, "grad_norm": 1.5137120395376915, "learning_rate": 3.408811169999695e-07, "loss": 0.4501, "step": 3861 }, { "epoch": 0.6231042271700549, "grad_norm": 1.5210645926590014, "learning_rate": 3.4062822999975207e-07, "loss": 0.5342, "step": 3862 }, { "epoch": 0.6232655695385608, "grad_norm": 1.57471210331063, "learning_rate": 3.403753883702397e-07, "loss": 0.58, "step": 3863 }, { "epoch": 0.6234269119070668, "grad_norm": 1.3808597634839668, "learning_rate": 3.401225921834124e-07, "loss": 0.4449, "step": 3864 }, { "epoch": 0.6235882542755727, "grad_norm": 1.478214369283974, "learning_rate": 3.398698415112378e-07, "loss": 0.366, "step": 3865 }, { "epoch": 0.6237495966440787, "grad_norm": 1.6113782840204365, "learning_rate": 3.396171364256697e-07, "loss": 0.4777, "step": 3866 }, { "epoch": 0.6239109390125847, "grad_norm": 1.978480136323347, "learning_rate": 3.393644769986496e-07, "loss": 0.6376, "step": 3867 }, { "epoch": 0.6240722813810907, "grad_norm": 1.6580978737577468, "learning_rate": 3.3911186330210564e-07, "loss": 0.6064, "step": 3868 }, { "epoch": 0.6242336237495967, "grad_norm": 2.189920230962981, "learning_rate": 3.388592954079533e-07, "loss": 0.5021, "step": 3869 }, { "epoch": 0.6243949661181026, "grad_norm": 1.4276524636376116, "learning_rate": 3.386067733880944e-07, "loss": 0.5561, "step": 3870 }, { "epoch": 0.6245563084866086, "grad_norm": 1.3332096827312487, "learning_rate": 3.3835429731441856e-07, "loss": 0.4886, "step": 3871 }, { "epoch": 0.6247176508551145, "grad_norm": 1.3098339068700373, "learning_rate": 3.3810186725880164e-07, "loss": 0.5789, "step": 3872 }, { "epoch": 0.6248789932236205, "grad_norm": 1.5239429124567778, "learning_rate": 3.378494832931066e-07, "loss": 0.5697, "step": 3873 }, { "epoch": 0.6250403355921265, "grad_norm": 1.8845035667274346, "learning_rate": 3.375971454891833e-07, "loss": 0.4679, "step": 3874 }, { "epoch": 0.6252016779606324, "grad_norm": 1.539090074011398, "learning_rate": 3.373448539188686e-07, "loss": 0.4822, "step": 3875 }, { "epoch": 0.6253630203291385, "grad_norm": 1.5070083133895997, "learning_rate": 3.370926086539858e-07, "loss": 0.3612, "step": 3876 }, { "epoch": 0.6255243626976444, "grad_norm": 1.5646138197988635, "learning_rate": 3.368404097663454e-07, "loss": 0.4284, "step": 3877 }, { "epoch": 0.6256857050661504, "grad_norm": 1.7152616995170737, "learning_rate": 3.365882573277445e-07, "loss": 0.4487, "step": 3878 }, { "epoch": 0.6258470474346564, "grad_norm": 1.5140926047088914, "learning_rate": 3.363361514099673e-07, "loss": 0.5576, "step": 3879 }, { "epoch": 0.6260083898031623, "grad_norm": 1.4587798479519278, "learning_rate": 3.360840920847842e-07, "loss": 0.4578, "step": 3880 }, { "epoch": 0.6261697321716683, "grad_norm": 1.7051546794052532, "learning_rate": 3.358320794239525e-07, "loss": 0.4811, "step": 3881 }, { "epoch": 0.6263310745401742, "grad_norm": 1.5570403271538282, "learning_rate": 3.355801134992169e-07, "loss": 0.415, "step": 3882 }, { "epoch": 0.6264924169086802, "grad_norm": 1.194011403967041, "learning_rate": 3.3532819438230767e-07, "loss": 0.4215, "step": 3883 }, { "epoch": 0.6266537592771861, "grad_norm": 1.9341077333363856, "learning_rate": 3.3507632214494275e-07, "loss": 0.4208, "step": 3884 }, { "epoch": 0.6268151016456922, "grad_norm": 1.7610132534518323, "learning_rate": 3.3482449685882617e-07, "loss": 0.5065, "step": 3885 }, { "epoch": 0.6269764440141982, "grad_norm": 1.6914539638775612, "learning_rate": 3.3457271859564863e-07, "loss": 0.4449, "step": 3886 }, { "epoch": 0.6271377863827041, "grad_norm": 1.2577366776447958, "learning_rate": 3.343209874270877e-07, "loss": 0.3091, "step": 3887 }, { "epoch": 0.6272991287512101, "grad_norm": 1.2799389807491697, "learning_rate": 3.3406930342480734e-07, "loss": 0.3734, "step": 3888 }, { "epoch": 0.627460471119716, "grad_norm": 2.166779282895448, "learning_rate": 3.338176666604581e-07, "loss": 0.5417, "step": 3889 }, { "epoch": 0.627621813488222, "grad_norm": 2.187965452538656, "learning_rate": 3.3356607720567707e-07, "loss": 0.5298, "step": 3890 }, { "epoch": 0.627783155856728, "grad_norm": 1.908274529174374, "learning_rate": 3.333145351320881e-07, "loss": 0.5962, "step": 3891 }, { "epoch": 0.6279444982252339, "grad_norm": 1.4084758097718049, "learning_rate": 3.330630405113013e-07, "loss": 0.41, "step": 3892 }, { "epoch": 0.6281058405937399, "grad_norm": 1.8631474668830326, "learning_rate": 3.3281159341491304e-07, "loss": 0.3305, "step": 3893 }, { "epoch": 0.6282671829622459, "grad_norm": 1.7284541749537825, "learning_rate": 3.325601939145069e-07, "loss": 0.5903, "step": 3894 }, { "epoch": 0.6284285253307519, "grad_norm": 1.5740273030299041, "learning_rate": 3.323088420816523e-07, "loss": 0.4813, "step": 3895 }, { "epoch": 0.6285898676992578, "grad_norm": 3.7192482494307306, "learning_rate": 3.3205753798790526e-07, "loss": 0.5887, "step": 3896 }, { "epoch": 0.6287512100677638, "grad_norm": 2.103402959131675, "learning_rate": 3.3180628170480824e-07, "loss": 0.6099, "step": 3897 }, { "epoch": 0.6289125524362698, "grad_norm": 2.0331854182781455, "learning_rate": 3.3155507330388996e-07, "loss": 0.6178, "step": 3898 }, { "epoch": 0.6290738948047757, "grad_norm": 1.7503183620240979, "learning_rate": 3.313039128566657e-07, "loss": 0.5251, "step": 3899 }, { "epoch": 0.6292352371732817, "grad_norm": 1.8277564696191058, "learning_rate": 3.31052800434637e-07, "loss": 0.486, "step": 3900 }, { "epoch": 0.6293965795417876, "grad_norm": 1.2821442809364558, "learning_rate": 3.308017361092918e-07, "loss": 0.5745, "step": 3901 }, { "epoch": 0.6295579219102937, "grad_norm": 1.8065115557780602, "learning_rate": 3.305507199521041e-07, "loss": 0.6048, "step": 3902 }, { "epoch": 0.6297192642787997, "grad_norm": 1.6010415323125435, "learning_rate": 3.3029975203453437e-07, "loss": 0.4606, "step": 3903 }, { "epoch": 0.6298806066473056, "grad_norm": 1.5301050070338469, "learning_rate": 3.300488324280294e-07, "loss": 0.4488, "step": 3904 }, { "epoch": 0.6300419490158116, "grad_norm": 2.352080532057665, "learning_rate": 3.2979796120402214e-07, "loss": 0.3375, "step": 3905 }, { "epoch": 0.6302032913843175, "grad_norm": 1.4826959851617165, "learning_rate": 3.295471384339319e-07, "loss": 0.4798, "step": 3906 }, { "epoch": 0.6303646337528235, "grad_norm": 1.7740601106284553, "learning_rate": 3.2929636418916407e-07, "loss": 0.6111, "step": 3907 }, { "epoch": 0.6305259761213294, "grad_norm": 1.4314176169782027, "learning_rate": 3.290456385411101e-07, "loss": 0.4638, "step": 3908 }, { "epoch": 0.6306873184898354, "grad_norm": 1.5808661043868926, "learning_rate": 3.2879496156114775e-07, "loss": 0.5591, "step": 3909 }, { "epoch": 0.6308486608583413, "grad_norm": 1.2983446641181906, "learning_rate": 3.2854433332064114e-07, "loss": 0.56, "step": 3910 }, { "epoch": 0.6310100032268474, "grad_norm": 1.9938242453021744, "learning_rate": 3.282937538909401e-07, "loss": 0.59, "step": 3911 }, { "epoch": 0.6311713455953534, "grad_norm": 2.688465211222858, "learning_rate": 3.2804322334338076e-07, "loss": 0.4295, "step": 3912 }, { "epoch": 0.6313326879638593, "grad_norm": 1.2768348823477882, "learning_rate": 3.277927417492854e-07, "loss": 0.4735, "step": 3913 }, { "epoch": 0.6314940303323653, "grad_norm": 2.0531969356853628, "learning_rate": 3.275423091799624e-07, "loss": 0.6806, "step": 3914 }, { "epoch": 0.6316553727008712, "grad_norm": 1.2024720141510943, "learning_rate": 3.2729192570670586e-07, "loss": 0.4056, "step": 3915 }, { "epoch": 0.6318167150693772, "grad_norm": 1.0954213587584378, "learning_rate": 3.270415914007961e-07, "loss": 0.5278, "step": 3916 }, { "epoch": 0.6319780574378832, "grad_norm": 1.825169317673703, "learning_rate": 3.267913063334997e-07, "loss": 0.5677, "step": 3917 }, { "epoch": 0.6321393998063891, "grad_norm": 1.164923739741032, "learning_rate": 3.265410705760689e-07, "loss": 0.3759, "step": 3918 }, { "epoch": 0.6323007421748952, "grad_norm": 1.8830427606744469, "learning_rate": 3.2629088419974204e-07, "loss": 0.6336, "step": 3919 }, { "epoch": 0.6324620845434011, "grad_norm": 1.4724059360167903, "learning_rate": 3.2604074727574325e-07, "loss": 0.5742, "step": 3920 }, { "epoch": 0.6326234269119071, "grad_norm": 1.7587920447816192, "learning_rate": 3.257906598752828e-07, "loss": 0.4901, "step": 3921 }, { "epoch": 0.632784769280413, "grad_norm": 1.658286084727255, "learning_rate": 3.255406220695565e-07, "loss": 0.5063, "step": 3922 }, { "epoch": 0.632946111648919, "grad_norm": 1.6136780036213352, "learning_rate": 3.2529063392974666e-07, "loss": 0.5135, "step": 3923 }, { "epoch": 0.633107454017425, "grad_norm": 1.7419413576602463, "learning_rate": 3.2504069552702087e-07, "loss": 0.582, "step": 3924 }, { "epoch": 0.6332687963859309, "grad_norm": 1.4315082233817658, "learning_rate": 3.247908069325327e-07, "loss": 0.5078, "step": 3925 }, { "epoch": 0.6334301387544369, "grad_norm": 1.8861673311454428, "learning_rate": 3.2454096821742164e-07, "loss": 0.4519, "step": 3926 }, { "epoch": 0.6335914811229428, "grad_norm": 1.1923296242488182, "learning_rate": 3.2429117945281295e-07, "loss": 0.4857, "step": 3927 }, { "epoch": 0.6337528234914489, "grad_norm": 1.652962210415374, "learning_rate": 3.2404144070981786e-07, "loss": 0.608, "step": 3928 }, { "epoch": 0.6339141658599549, "grad_norm": 1.4774908951738603, "learning_rate": 3.237917520595331e-07, "loss": 0.5135, "step": 3929 }, { "epoch": 0.6340755082284608, "grad_norm": 1.7905367311887472, "learning_rate": 3.235421135730411e-07, "loss": 0.4163, "step": 3930 }, { "epoch": 0.6342368505969668, "grad_norm": 1.1771995127009045, "learning_rate": 3.232925253214101e-07, "loss": 0.5064, "step": 3931 }, { "epoch": 0.6343981929654727, "grad_norm": 1.4256883564826432, "learning_rate": 3.230429873756942e-07, "loss": 0.4862, "step": 3932 }, { "epoch": 0.6345595353339787, "grad_norm": 1.3611495613756879, "learning_rate": 3.2279349980693296e-07, "loss": 0.3043, "step": 3933 }, { "epoch": 0.6347208777024846, "grad_norm": 2.4468536882702567, "learning_rate": 3.225440626861517e-07, "loss": 0.6909, "step": 3934 }, { "epoch": 0.6348822200709906, "grad_norm": 1.33902352809391, "learning_rate": 3.2229467608436123e-07, "loss": 0.473, "step": 3935 }, { "epoch": 0.6350435624394967, "grad_norm": 1.8329738640220008, "learning_rate": 3.2204534007255826e-07, "loss": 0.448, "step": 3936 }, { "epoch": 0.6352049048080026, "grad_norm": 1.607440192858681, "learning_rate": 3.2179605472172487e-07, "loss": 0.4751, "step": 3937 }, { "epoch": 0.6353662471765086, "grad_norm": 1.5006982449756996, "learning_rate": 3.215468201028288e-07, "loss": 0.3664, "step": 3938 }, { "epoch": 0.6355275895450145, "grad_norm": 1.3768936901927877, "learning_rate": 3.2129763628682314e-07, "loss": 0.4744, "step": 3939 }, { "epoch": 0.6356889319135205, "grad_norm": 1.766087901782326, "learning_rate": 3.2104850334464696e-07, "loss": 0.5449, "step": 3940 }, { "epoch": 0.6358502742820265, "grad_norm": 1.2843798579288879, "learning_rate": 3.207994213472247e-07, "loss": 0.4624, "step": 3941 }, { "epoch": 0.6360116166505324, "grad_norm": 1.3533391135582467, "learning_rate": 3.2055039036546595e-07, "loss": 0.3687, "step": 3942 }, { "epoch": 0.6361729590190384, "grad_norm": 1.3771760800630186, "learning_rate": 3.2030141047026614e-07, "loss": 0.5312, "step": 3943 }, { "epoch": 0.6363343013875443, "grad_norm": 1.8919793944577552, "learning_rate": 3.200524817325059e-07, "loss": 0.5282, "step": 3944 }, { "epoch": 0.6364956437560504, "grad_norm": 1.6441307758191273, "learning_rate": 3.1980360422305166e-07, "loss": 0.5662, "step": 3945 }, { "epoch": 0.6366569861245563, "grad_norm": 1.6489296851819346, "learning_rate": 3.195547780127549e-07, "loss": 0.4818, "step": 3946 }, { "epoch": 0.6368183284930623, "grad_norm": 1.335667052974761, "learning_rate": 3.193060031724526e-07, "loss": 0.4477, "step": 3947 }, { "epoch": 0.6369796708615683, "grad_norm": 1.6879007075652417, "learning_rate": 3.190572797729671e-07, "loss": 0.4007, "step": 3948 }, { "epoch": 0.6371410132300742, "grad_norm": 1.234026461297452, "learning_rate": 3.1880860788510635e-07, "loss": 0.4011, "step": 3949 }, { "epoch": 0.6373023555985802, "grad_norm": 1.7229979991358797, "learning_rate": 3.185599875796632e-07, "loss": 0.5147, "step": 3950 }, { "epoch": 0.6374636979670861, "grad_norm": 1.2864054721777567, "learning_rate": 3.183114189274163e-07, "loss": 0.4712, "step": 3951 }, { "epoch": 0.6376250403355921, "grad_norm": 2.0157284968810725, "learning_rate": 3.180629019991292e-07, "loss": 0.5815, "step": 3952 }, { "epoch": 0.637786382704098, "grad_norm": 1.2706175396836745, "learning_rate": 3.178144368655509e-07, "loss": 0.3639, "step": 3953 }, { "epoch": 0.6379477250726041, "grad_norm": 1.511504244309756, "learning_rate": 3.175660235974156e-07, "loss": 0.5164, "step": 3954 }, { "epoch": 0.6381090674411101, "grad_norm": 2.027003029947599, "learning_rate": 3.173176622654428e-07, "loss": 0.4481, "step": 3955 }, { "epoch": 0.638270409809616, "grad_norm": 1.8465845980236273, "learning_rate": 3.170693529403371e-07, "loss": 0.5323, "step": 3956 }, { "epoch": 0.638431752178122, "grad_norm": 1.6006593716471185, "learning_rate": 3.1682109569278836e-07, "loss": 0.3811, "step": 3957 }, { "epoch": 0.6385930945466279, "grad_norm": 1.8920648309838635, "learning_rate": 3.165728905934718e-07, "loss": 0.6428, "step": 3958 }, { "epoch": 0.6387544369151339, "grad_norm": 1.845515165900725, "learning_rate": 3.1632473771304747e-07, "loss": 0.5666, "step": 3959 }, { "epoch": 0.6389157792836399, "grad_norm": 1.6472714173231173, "learning_rate": 3.1607663712216067e-07, "loss": 0.5884, "step": 3960 }, { "epoch": 0.6390771216521458, "grad_norm": 1.9238262040956464, "learning_rate": 3.158285888914418e-07, "loss": 0.5453, "step": 3961 }, { "epoch": 0.6392384640206519, "grad_norm": 1.604986410707361, "learning_rate": 3.155805930915064e-07, "loss": 0.6265, "step": 3962 }, { "epoch": 0.6393998063891578, "grad_norm": 1.3879347936171238, "learning_rate": 3.1533264979295527e-07, "loss": 0.5481, "step": 3963 }, { "epoch": 0.6395611487576638, "grad_norm": 1.5944539179873098, "learning_rate": 3.150847590663741e-07, "loss": 0.4774, "step": 3964 }, { "epoch": 0.6397224911261697, "grad_norm": 1.4805946799797294, "learning_rate": 3.148369209823333e-07, "loss": 0.5259, "step": 3965 }, { "epoch": 0.6398838334946757, "grad_norm": 1.323240655989658, "learning_rate": 3.145891356113888e-07, "loss": 0.5958, "step": 3966 }, { "epoch": 0.6400451758631817, "grad_norm": 1.3192183797262653, "learning_rate": 3.1434140302408125e-07, "loss": 0.4221, "step": 3967 }, { "epoch": 0.6402065182316876, "grad_norm": 1.4623079450960026, "learning_rate": 3.140937232909363e-07, "loss": 0.4393, "step": 3968 }, { "epoch": 0.6403678606001936, "grad_norm": 1.4513383772404196, "learning_rate": 3.138460964824646e-07, "loss": 0.4456, "step": 3969 }, { "epoch": 0.6405292029686995, "grad_norm": 1.8261374730424402, "learning_rate": 3.135985226691617e-07, "loss": 0.5049, "step": 3970 }, { "epoch": 0.6406905453372056, "grad_norm": 1.4738597859394968, "learning_rate": 3.133510019215082e-07, "loss": 0.4199, "step": 3971 }, { "epoch": 0.6408518877057116, "grad_norm": 1.5901327784296817, "learning_rate": 3.131035343099693e-07, "loss": 0.6032, "step": 3972 }, { "epoch": 0.6410132300742175, "grad_norm": 1.4918647936442695, "learning_rate": 3.1285611990499506e-07, "loss": 0.4219, "step": 3973 }, { "epoch": 0.6411745724427235, "grad_norm": 2.1838454099795435, "learning_rate": 3.126087587770211e-07, "loss": 0.6757, "step": 3974 }, { "epoch": 0.6413359148112294, "grad_norm": 2.031334273717546, "learning_rate": 3.12361450996467e-07, "loss": 0.579, "step": 3975 }, { "epoch": 0.6414972571797354, "grad_norm": 1.955378826541201, "learning_rate": 3.1211419663373754e-07, "loss": 0.4266, "step": 3976 }, { "epoch": 0.6416585995482413, "grad_norm": 1.6237577678679624, "learning_rate": 3.1186699575922237e-07, "loss": 0.4419, "step": 3977 }, { "epoch": 0.6418199419167473, "grad_norm": 1.4342949838697003, "learning_rate": 3.1161984844329576e-07, "loss": 0.3221, "step": 3978 }, { "epoch": 0.6419812842852534, "grad_norm": 1.86126055599829, "learning_rate": 3.1137275475631656e-07, "loss": 0.4845, "step": 3979 }, { "epoch": 0.6421426266537593, "grad_norm": 1.5944333574896852, "learning_rate": 3.111257147686289e-07, "loss": 0.4958, "step": 3980 }, { "epoch": 0.6423039690222653, "grad_norm": 1.5418416602678513, "learning_rate": 3.1087872855056107e-07, "loss": 0.3091, "step": 3981 }, { "epoch": 0.6424653113907712, "grad_norm": 1.4452978184959562, "learning_rate": 3.106317961724263e-07, "loss": 0.4785, "step": 3982 }, { "epoch": 0.6426266537592772, "grad_norm": 1.4279268189866978, "learning_rate": 3.1038491770452246e-07, "loss": 0.5783, "step": 3983 }, { "epoch": 0.6427879961277831, "grad_norm": 1.5524608059172706, "learning_rate": 3.101380932171321e-07, "loss": 0.5959, "step": 3984 }, { "epoch": 0.6429493384962891, "grad_norm": 1.5502276068898566, "learning_rate": 3.0989132278052223e-07, "loss": 0.5274, "step": 3985 }, { "epoch": 0.6431106808647951, "grad_norm": 1.5646072673702458, "learning_rate": 3.0964460646494485e-07, "loss": 0.4521, "step": 3986 }, { "epoch": 0.643272023233301, "grad_norm": 1.9727132958954614, "learning_rate": 3.093979443406363e-07, "loss": 0.6423, "step": 3987 }, { "epoch": 0.6434333656018071, "grad_norm": 1.6543231047765605, "learning_rate": 3.0915133647781744e-07, "loss": 0.5534, "step": 3988 }, { "epoch": 0.643594707970313, "grad_norm": 1.5011257874290376, "learning_rate": 3.0890478294669363e-07, "loss": 0.4914, "step": 3989 }, { "epoch": 0.643756050338819, "grad_norm": 2.010730448795567, "learning_rate": 3.086582838174551e-07, "loss": 0.6092, "step": 3990 }, { "epoch": 0.643917392707325, "grad_norm": 1.5854727529195336, "learning_rate": 3.0841183916027636e-07, "loss": 0.4132, "step": 3991 }, { "epoch": 0.6440787350758309, "grad_norm": 1.4496857335449298, "learning_rate": 3.081654490453162e-07, "loss": 0.4326, "step": 3992 }, { "epoch": 0.6442400774443369, "grad_norm": 1.2798782541818767, "learning_rate": 3.0791911354271845e-07, "loss": 0.4083, "step": 3993 }, { "epoch": 0.6444014198128428, "grad_norm": 1.7737651639885619, "learning_rate": 3.0767283272261085e-07, "loss": 0.5113, "step": 3994 }, { "epoch": 0.6445627621813488, "grad_norm": 1.4323529686958318, "learning_rate": 3.0742660665510583e-07, "loss": 0.4879, "step": 3995 }, { "epoch": 0.6447241045498547, "grad_norm": 1.390729921647557, "learning_rate": 3.071804354103e-07, "loss": 0.3632, "step": 3996 }, { "epoch": 0.6448854469183608, "grad_norm": 1.903568821161182, "learning_rate": 3.069343190582749e-07, "loss": 0.5271, "step": 3997 }, { "epoch": 0.6450467892868668, "grad_norm": 1.219536894077141, "learning_rate": 3.066882576690959e-07, "loss": 0.5166, "step": 3998 }, { "epoch": 0.6452081316553727, "grad_norm": 1.444068001738921, "learning_rate": 3.0644225131281297e-07, "loss": 0.3961, "step": 3999 }, { "epoch": 0.6453694740238787, "grad_norm": 1.7450470315229825, "learning_rate": 3.061963000594603e-07, "loss": 0.3845, "step": 4000 }, { "epoch": 0.6455308163923846, "grad_norm": 1.7426995863374803, "learning_rate": 3.059504039790565e-07, "loss": 0.5122, "step": 4001 }, { "epoch": 0.6456921587608906, "grad_norm": 1.6825589498137616, "learning_rate": 3.057045631416043e-07, "loss": 0.531, "step": 4002 }, { "epoch": 0.6458535011293965, "grad_norm": 1.3481537716005538, "learning_rate": 3.05458777617091e-07, "loss": 0.4564, "step": 4003 }, { "epoch": 0.6460148434979025, "grad_norm": 1.1720404953128531, "learning_rate": 3.0521304747548793e-07, "loss": 0.5864, "step": 4004 }, { "epoch": 0.6461761858664086, "grad_norm": 1.7188432148018367, "learning_rate": 3.0496737278675057e-07, "loss": 0.5738, "step": 4005 }, { "epoch": 0.6463375282349145, "grad_norm": 1.6403711031683101, "learning_rate": 3.047217536208189e-07, "loss": 0.4627, "step": 4006 }, { "epoch": 0.6464988706034205, "grad_norm": 1.6479192705612091, "learning_rate": 3.0447619004761694e-07, "loss": 0.5027, "step": 4007 }, { "epoch": 0.6466602129719264, "grad_norm": 1.7697480260517628, "learning_rate": 3.042306821370526e-07, "loss": 0.476, "step": 4008 }, { "epoch": 0.6468215553404324, "grad_norm": 1.7534068188781529, "learning_rate": 3.0398522995901866e-07, "loss": 0.5383, "step": 4009 }, { "epoch": 0.6469828977089384, "grad_norm": 1.6238496082932077, "learning_rate": 3.0373983358339137e-07, "loss": 0.5778, "step": 4010 }, { "epoch": 0.6471442400774443, "grad_norm": 1.9807791382026896, "learning_rate": 3.0349449308003134e-07, "loss": 0.5806, "step": 4011 }, { "epoch": 0.6473055824459503, "grad_norm": 1.7149693610343302, "learning_rate": 3.0324920851878333e-07, "loss": 0.5539, "step": 4012 }, { "epoch": 0.6474669248144562, "grad_norm": 1.3999005980625452, "learning_rate": 3.0300397996947604e-07, "loss": 0.5425, "step": 4013 }, { "epoch": 0.6476282671829623, "grad_norm": 1.23421477231569, "learning_rate": 3.0275880750192224e-07, "loss": 0.6623, "step": 4014 }, { "epoch": 0.6477896095514682, "grad_norm": 1.8507574077469804, "learning_rate": 3.025136911859187e-07, "loss": 0.567, "step": 4015 }, { "epoch": 0.6479509519199742, "grad_norm": 1.886609250004881, "learning_rate": 3.022686310912466e-07, "loss": 0.5149, "step": 4016 }, { "epoch": 0.6481122942884802, "grad_norm": 1.9695798547868844, "learning_rate": 3.0202362728767063e-07, "loss": 0.6095, "step": 4017 }, { "epoch": 0.6482736366569861, "grad_norm": 2.5224109368532104, "learning_rate": 3.017786798449394e-07, "loss": 0.3995, "step": 4018 }, { "epoch": 0.6484349790254921, "grad_norm": 1.8167468992358784, "learning_rate": 3.01533788832786e-07, "loss": 0.5644, "step": 4019 }, { "epoch": 0.648596321393998, "grad_norm": 1.523259705169979, "learning_rate": 3.012889543209271e-07, "loss": 0.467, "step": 4020 }, { "epoch": 0.648757663762504, "grad_norm": 1.774683711925668, "learning_rate": 3.010441763790634e-07, "loss": 0.5052, "step": 4021 }, { "epoch": 0.6489190061310101, "grad_norm": 1.8328551044374197, "learning_rate": 3.007994550768793e-07, "loss": 0.648, "step": 4022 }, { "epoch": 0.649080348499516, "grad_norm": 1.31621036147436, "learning_rate": 3.005547904840432e-07, "loss": 0.5951, "step": 4023 }, { "epoch": 0.649241690868022, "grad_norm": 2.208934833972819, "learning_rate": 3.003101826702074e-07, "loss": 0.6492, "step": 4024 }, { "epoch": 0.6494030332365279, "grad_norm": 1.461339884947502, "learning_rate": 3.0006563170500807e-07, "loss": 0.563, "step": 4025 }, { "epoch": 0.6495643756050339, "grad_norm": 1.903809282528248, "learning_rate": 2.998211376580652e-07, "loss": 0.5435, "step": 4026 }, { "epoch": 0.6497257179735398, "grad_norm": 1.2556164923418134, "learning_rate": 2.995767005989823e-07, "loss": 0.5005, "step": 4027 }, { "epoch": 0.6498870603420458, "grad_norm": 1.480152347945735, "learning_rate": 2.993323205973468e-07, "loss": 0.5408, "step": 4028 }, { "epoch": 0.6500484027105518, "grad_norm": 1.4715692773623612, "learning_rate": 2.990879977227303e-07, "loss": 0.36, "step": 4029 }, { "epoch": 0.6502097450790577, "grad_norm": 1.3774683211711543, "learning_rate": 2.988437320446875e-07, "loss": 0.5461, "step": 4030 }, { "epoch": 0.6503710874475638, "grad_norm": 1.3608599915533992, "learning_rate": 2.98599523632757e-07, "loss": 0.3821, "step": 4031 }, { "epoch": 0.6505324298160697, "grad_norm": 1.063275559050657, "learning_rate": 2.9835537255646166e-07, "loss": 0.3991, "step": 4032 }, { "epoch": 0.6506937721845757, "grad_norm": 1.4237931277313256, "learning_rate": 2.9811127888530713e-07, "loss": 0.4228, "step": 4033 }, { "epoch": 0.6508551145530816, "grad_norm": 1.8519739908983244, "learning_rate": 2.978672426887834e-07, "loss": 0.6302, "step": 4034 }, { "epoch": 0.6510164569215876, "grad_norm": 2.10612712467731, "learning_rate": 2.9762326403636373e-07, "loss": 0.5849, "step": 4035 }, { "epoch": 0.6511777992900936, "grad_norm": 1.3516459356584771, "learning_rate": 2.973793429975051e-07, "loss": 0.5339, "step": 4036 }, { "epoch": 0.6513391416585995, "grad_norm": 1.6196736561593712, "learning_rate": 2.971354796416481e-07, "loss": 0.5106, "step": 4037 }, { "epoch": 0.6515004840271055, "grad_norm": 1.5148975160135578, "learning_rate": 2.968916740382169e-07, "loss": 0.4685, "step": 4038 }, { "epoch": 0.6516618263956114, "grad_norm": 2.073356492300598, "learning_rate": 2.966479262566193e-07, "loss": 0.6241, "step": 4039 }, { "epoch": 0.6518231687641175, "grad_norm": 1.183035287574218, "learning_rate": 2.9640423636624656e-07, "loss": 0.4117, "step": 4040 }, { "epoch": 0.6519845111326235, "grad_norm": 1.6689790975634289, "learning_rate": 2.9616060443647316e-07, "loss": 0.3898, "step": 4041 }, { "epoch": 0.6521458535011294, "grad_norm": 1.2684115592670429, "learning_rate": 2.959170305366577e-07, "loss": 0.4498, "step": 4042 }, { "epoch": 0.6523071958696354, "grad_norm": 1.8522967077980468, "learning_rate": 2.95673514736142e-07, "loss": 0.5003, "step": 4043 }, { "epoch": 0.6524685382381413, "grad_norm": 1.1843759199556587, "learning_rate": 2.9543005710425116e-07, "loss": 0.2344, "step": 4044 }, { "epoch": 0.6526298806066473, "grad_norm": 1.6106301755455625, "learning_rate": 2.951866577102938e-07, "loss": 0.5216, "step": 4045 }, { "epoch": 0.6527912229751532, "grad_norm": 1.7139281867928517, "learning_rate": 2.9494331662356197e-07, "loss": 0.4478, "step": 4046 }, { "epoch": 0.6529525653436592, "grad_norm": 1.3317188780427716, "learning_rate": 2.947000339133314e-07, "loss": 0.538, "step": 4047 }, { "epoch": 0.6531139077121653, "grad_norm": 2.082421700973661, "learning_rate": 2.9445680964886077e-07, "loss": 0.4312, "step": 4048 }, { "epoch": 0.6532752500806712, "grad_norm": 1.43870725937768, "learning_rate": 2.942136438993923e-07, "loss": 0.4195, "step": 4049 }, { "epoch": 0.6534365924491772, "grad_norm": 1.375178758965578, "learning_rate": 2.939705367341515e-07, "loss": 0.4414, "step": 4050 }, { "epoch": 0.6535979348176831, "grad_norm": 1.1252070872123292, "learning_rate": 2.937274882223474e-07, "loss": 0.3474, "step": 4051 }, { "epoch": 0.6537592771861891, "grad_norm": 1.754106336025823, "learning_rate": 2.934844984331722e-07, "loss": 0.5608, "step": 4052 }, { "epoch": 0.653920619554695, "grad_norm": 1.9273998584783283, "learning_rate": 2.9324156743580133e-07, "loss": 0.566, "step": 4053 }, { "epoch": 0.654081961923201, "grad_norm": 1.3206116828301167, "learning_rate": 2.9299869529939324e-07, "loss": 0.4275, "step": 4054 }, { "epoch": 0.654243304291707, "grad_norm": 1.7878735852153573, "learning_rate": 2.927558820930903e-07, "loss": 0.4887, "step": 4055 }, { "epoch": 0.6544046466602129, "grad_norm": 1.7960568099435712, "learning_rate": 2.925131278860177e-07, "loss": 0.3838, "step": 4056 }, { "epoch": 0.654565989028719, "grad_norm": 1.6598012746643989, "learning_rate": 2.922704327472838e-07, "loss": 0.5139, "step": 4057 }, { "epoch": 0.6547273313972249, "grad_norm": 1.7967612852520003, "learning_rate": 2.9202779674598017e-07, "loss": 0.5024, "step": 4058 }, { "epoch": 0.6548886737657309, "grad_norm": 1.5484219916959574, "learning_rate": 2.917852199511815e-07, "loss": 0.372, "step": 4059 }, { "epoch": 0.6550500161342369, "grad_norm": 1.3694835456508676, "learning_rate": 2.915427024319456e-07, "loss": 0.2586, "step": 4060 }, { "epoch": 0.6552113585027428, "grad_norm": 1.8264085585675616, "learning_rate": 2.913002442573137e-07, "loss": 0.7209, "step": 4061 }, { "epoch": 0.6553727008712488, "grad_norm": 1.8333968743959395, "learning_rate": 2.910578454963101e-07, "loss": 0.5606, "step": 4062 }, { "epoch": 0.6555340432397547, "grad_norm": 1.7686768210002513, "learning_rate": 2.908155062179415e-07, "loss": 0.5597, "step": 4063 }, { "epoch": 0.6556953856082607, "grad_norm": 2.556694805366747, "learning_rate": 2.905732264911987e-07, "loss": 0.7515, "step": 4064 }, { "epoch": 0.6558567279767668, "grad_norm": 1.7278107838614392, "learning_rate": 2.9033100638505443e-07, "loss": 0.5166, "step": 4065 }, { "epoch": 0.6560180703452727, "grad_norm": 1.7339160758787946, "learning_rate": 2.9008884596846583e-07, "loss": 0.4775, "step": 4066 }, { "epoch": 0.6561794127137787, "grad_norm": 1.4319077225687933, "learning_rate": 2.8984674531037163e-07, "loss": 0.5175, "step": 4067 }, { "epoch": 0.6563407550822846, "grad_norm": 1.955511542418594, "learning_rate": 2.896047044796944e-07, "loss": 0.5589, "step": 4068 }, { "epoch": 0.6565020974507906, "grad_norm": 1.2145172367246961, "learning_rate": 2.8936272354533974e-07, "loss": 0.461, "step": 4069 }, { "epoch": 0.6566634398192965, "grad_norm": 1.6054223418199411, "learning_rate": 2.891208025761954e-07, "loss": 0.3704, "step": 4070 }, { "epoch": 0.6568247821878025, "grad_norm": 1.9091076303137846, "learning_rate": 2.888789416411329e-07, "loss": 0.5165, "step": 4071 }, { "epoch": 0.6569861245563084, "grad_norm": 2.2448086719503135, "learning_rate": 2.886371408090065e-07, "loss": 0.8052, "step": 4072 }, { "epoch": 0.6571474669248144, "grad_norm": 1.47337845717384, "learning_rate": 2.8839540014865265e-07, "loss": 0.6106, "step": 4073 }, { "epoch": 0.6573088092933205, "grad_norm": 1.4489736838205236, "learning_rate": 2.881537197288917e-07, "loss": 0.4134, "step": 4074 }, { "epoch": 0.6574701516618264, "grad_norm": 1.6070062185664467, "learning_rate": 2.8791209961852636e-07, "loss": 0.5986, "step": 4075 }, { "epoch": 0.6576314940303324, "grad_norm": 1.838329818090477, "learning_rate": 2.876705398863418e-07, "loss": 0.3949, "step": 4076 }, { "epoch": 0.6577928363988383, "grad_norm": 1.314082826146466, "learning_rate": 2.8742904060110666e-07, "loss": 0.4955, "step": 4077 }, { "epoch": 0.6579541787673443, "grad_norm": 1.5496250714333357, "learning_rate": 2.871876018315721e-07, "loss": 0.3911, "step": 4078 }, { "epoch": 0.6581155211358503, "grad_norm": 2.0211055087295646, "learning_rate": 2.8694622364647214e-07, "loss": 0.4826, "step": 4079 }, { "epoch": 0.6582768635043562, "grad_norm": 1.6822828955958058, "learning_rate": 2.8670490611452314e-07, "loss": 0.4132, "step": 4080 }, { "epoch": 0.6584382058728622, "grad_norm": 1.3111191706670113, "learning_rate": 2.864636493044247e-07, "loss": 0.3772, "step": 4081 }, { "epoch": 0.6585995482413682, "grad_norm": 1.677345239632402, "learning_rate": 2.8622245328485907e-07, "loss": 0.4232, "step": 4082 }, { "epoch": 0.6587608906098742, "grad_norm": 1.7676618187411246, "learning_rate": 2.859813181244908e-07, "loss": 0.5477, "step": 4083 }, { "epoch": 0.6589222329783802, "grad_norm": 1.6356674860507885, "learning_rate": 2.8574024389196735e-07, "loss": 0.4868, "step": 4084 }, { "epoch": 0.6590835753468861, "grad_norm": 3.8778880032307, "learning_rate": 2.854992306559194e-07, "loss": 0.5018, "step": 4085 }, { "epoch": 0.6592449177153921, "grad_norm": 1.4764509209350414, "learning_rate": 2.852582784849591e-07, "loss": 0.5675, "step": 4086 }, { "epoch": 0.659406260083898, "grad_norm": 1.5356466272094202, "learning_rate": 2.850173874476821e-07, "loss": 0.5654, "step": 4087 }, { "epoch": 0.659567602452404, "grad_norm": 2.5153982877752337, "learning_rate": 2.847765576126664e-07, "loss": 0.6713, "step": 4088 }, { "epoch": 0.6597289448209099, "grad_norm": 2.163155927020915, "learning_rate": 2.845357890484726e-07, "loss": 0.4675, "step": 4089 }, { "epoch": 0.6598902871894159, "grad_norm": 1.3441475125008577, "learning_rate": 2.842950818236439e-07, "loss": 0.4266, "step": 4090 }, { "epoch": 0.660051629557922, "grad_norm": 1.6320216257588562, "learning_rate": 2.8405443600670575e-07, "loss": 0.5374, "step": 4091 }, { "epoch": 0.6602129719264279, "grad_norm": 2.084311662480868, "learning_rate": 2.8381385166616663e-07, "loss": 0.791, "step": 4092 }, { "epoch": 0.6603743142949339, "grad_norm": 1.4889666563493262, "learning_rate": 2.8357332887051677e-07, "loss": 0.356, "step": 4093 }, { "epoch": 0.6605356566634398, "grad_norm": 1.8590049696020194, "learning_rate": 2.833328676882297e-07, "loss": 0.5801, "step": 4094 }, { "epoch": 0.6606969990319458, "grad_norm": 1.4294897891223988, "learning_rate": 2.8309246818776103e-07, "loss": 0.4401, "step": 4095 }, { "epoch": 0.6608583414004517, "grad_norm": 1.4357326257641831, "learning_rate": 2.8285213043754865e-07, "loss": 0.3766, "step": 4096 }, { "epoch": 0.6610196837689577, "grad_norm": 1.268934326687982, "learning_rate": 2.82611854506013e-07, "loss": 0.5167, "step": 4097 }, { "epoch": 0.6611810261374637, "grad_norm": 1.4395597876441772, "learning_rate": 2.8237164046155733e-07, "loss": 0.4784, "step": 4098 }, { "epoch": 0.6613423685059696, "grad_norm": 1.3356576369871838, "learning_rate": 2.821314883725664e-07, "loss": 0.5743, "step": 4099 }, { "epoch": 0.6615037108744757, "grad_norm": 1.308899755426501, "learning_rate": 2.8189139830740814e-07, "loss": 0.3783, "step": 4100 }, { "epoch": 0.6616650532429816, "grad_norm": 2.2290289886834933, "learning_rate": 2.816513703344324e-07, "loss": 0.7404, "step": 4101 }, { "epoch": 0.6618263956114876, "grad_norm": 1.3169982665621878, "learning_rate": 2.8141140452197165e-07, "loss": 0.5457, "step": 4102 }, { "epoch": 0.6619877379799936, "grad_norm": 1.7474804178868006, "learning_rate": 2.811715009383405e-07, "loss": 0.5355, "step": 4103 }, { "epoch": 0.6621490803484995, "grad_norm": 1.7756293108040868, "learning_rate": 2.809316596518356e-07, "loss": 0.5704, "step": 4104 }, { "epoch": 0.6623104227170055, "grad_norm": 1.6487348387789698, "learning_rate": 2.806918807307363e-07, "loss": 0.483, "step": 4105 }, { "epoch": 0.6624717650855114, "grad_norm": 1.8125828691151387, "learning_rate": 2.8045216424330377e-07, "loss": 0.558, "step": 4106 }, { "epoch": 0.6626331074540174, "grad_norm": 1.406742221730098, "learning_rate": 2.802125102577817e-07, "loss": 0.2795, "step": 4107 }, { "epoch": 0.6627944498225234, "grad_norm": 1.5062903592729386, "learning_rate": 2.7997291884239617e-07, "loss": 0.5018, "step": 4108 }, { "epoch": 0.6629557921910294, "grad_norm": 1.1304293590713772, "learning_rate": 2.797333900653548e-07, "loss": 0.4042, "step": 4109 }, { "epoch": 0.6631171345595354, "grad_norm": 1.5346300616241848, "learning_rate": 2.794939239948479e-07, "loss": 0.3508, "step": 4110 }, { "epoch": 0.6632784769280413, "grad_norm": 1.8259666286732406, "learning_rate": 2.792545206990479e-07, "loss": 0.4084, "step": 4111 }, { "epoch": 0.6634398192965473, "grad_norm": 1.2209662314110716, "learning_rate": 2.790151802461092e-07, "loss": 0.3956, "step": 4112 }, { "epoch": 0.6636011616650532, "grad_norm": 1.3837941882375737, "learning_rate": 2.787759027041686e-07, "loss": 0.5267, "step": 4113 }, { "epoch": 0.6637625040335592, "grad_norm": 1.4375023219877645, "learning_rate": 2.785366881413443e-07, "loss": 0.4649, "step": 4114 }, { "epoch": 0.6639238464020651, "grad_norm": 1.2626455581905345, "learning_rate": 2.7829753662573723e-07, "loss": 0.4544, "step": 4115 }, { "epoch": 0.6640851887705711, "grad_norm": 1.3820289072692924, "learning_rate": 2.7805844822543043e-07, "loss": 0.4857, "step": 4116 }, { "epoch": 0.6642465311390772, "grad_norm": 1.460261713896276, "learning_rate": 2.778194230084883e-07, "loss": 0.5286, "step": 4117 }, { "epoch": 0.6644078735075831, "grad_norm": 1.7252637716776538, "learning_rate": 2.7758046104295797e-07, "loss": 0.525, "step": 4118 }, { "epoch": 0.6645692158760891, "grad_norm": 1.559390751027156, "learning_rate": 2.77341562396868e-07, "loss": 0.5769, "step": 4119 }, { "epoch": 0.664730558244595, "grad_norm": 1.399053171692084, "learning_rate": 2.771027271382292e-07, "loss": 0.5145, "step": 4120 }, { "epoch": 0.664891900613101, "grad_norm": 1.2083543359093143, "learning_rate": 2.768639553350348e-07, "loss": 0.4851, "step": 4121 }, { "epoch": 0.665053242981607, "grad_norm": 1.8431300397915915, "learning_rate": 2.7662524705525876e-07, "loss": 0.4496, "step": 4122 }, { "epoch": 0.6652145853501129, "grad_norm": 4.254952630430453, "learning_rate": 2.763866023668581e-07, "loss": 0.6078, "step": 4123 }, { "epoch": 0.6653759277186189, "grad_norm": 1.073254350032739, "learning_rate": 2.7614802133777113e-07, "loss": 0.4403, "step": 4124 }, { "epoch": 0.6655372700871249, "grad_norm": 1.6616145651456742, "learning_rate": 2.759095040359183e-07, "loss": 0.4133, "step": 4125 }, { "epoch": 0.6656986124556309, "grad_norm": 1.838412690073874, "learning_rate": 2.7567105052920213e-07, "loss": 0.3796, "step": 4126 }, { "epoch": 0.6658599548241368, "grad_norm": 1.7141539418006762, "learning_rate": 2.7543266088550623e-07, "loss": 0.4122, "step": 4127 }, { "epoch": 0.6660212971926428, "grad_norm": 1.5850086216737211, "learning_rate": 2.7519433517269664e-07, "loss": 0.4765, "step": 4128 }, { "epoch": 0.6661826395611488, "grad_norm": 1.7317644009195818, "learning_rate": 2.7495607345862124e-07, "loss": 0.5536, "step": 4129 }, { "epoch": 0.6663439819296547, "grad_norm": 1.4948652118662569, "learning_rate": 2.7471787581110915e-07, "loss": 0.4591, "step": 4130 }, { "epoch": 0.6665053242981607, "grad_norm": 1.6699547836898103, "learning_rate": 2.74479742297972e-07, "loss": 0.6339, "step": 4131 }, { "epoch": 0.6666666666666666, "grad_norm": 1.8834453484748446, "learning_rate": 2.742416729870023e-07, "loss": 0.7246, "step": 4132 }, { "epoch": 0.6668280090351726, "grad_norm": 1.5499989724924927, "learning_rate": 2.74003667945975e-07, "loss": 0.4694, "step": 4133 }, { "epoch": 0.6669893514036787, "grad_norm": 1.4668520575453325, "learning_rate": 2.737657272426464e-07, "loss": 0.444, "step": 4134 }, { "epoch": 0.6671506937721846, "grad_norm": 1.979238214370737, "learning_rate": 2.7352785094475474e-07, "loss": 0.6332, "step": 4135 }, { "epoch": 0.6673120361406906, "grad_norm": 1.7667942058063175, "learning_rate": 2.732900391200198e-07, "loss": 0.5152, "step": 4136 }, { "epoch": 0.6674733785091965, "grad_norm": 1.781766047095418, "learning_rate": 2.7305229183614267e-07, "loss": 0.5779, "step": 4137 }, { "epoch": 0.6676347208777025, "grad_norm": 1.3138626835749974, "learning_rate": 2.7281460916080646e-07, "loss": 0.3415, "step": 4138 }, { "epoch": 0.6677960632462084, "grad_norm": 1.4148075396129, "learning_rate": 2.725769911616761e-07, "loss": 0.3877, "step": 4139 }, { "epoch": 0.6679574056147144, "grad_norm": 1.7286848599621205, "learning_rate": 2.7233943790639745e-07, "loss": 0.4767, "step": 4140 }, { "epoch": 0.6681187479832204, "grad_norm": 1.2338682172540727, "learning_rate": 2.7210194946259835e-07, "loss": 0.4941, "step": 4141 }, { "epoch": 0.6682800903517263, "grad_norm": 1.7980928357548844, "learning_rate": 2.7186452589788833e-07, "loss": 0.5318, "step": 4142 }, { "epoch": 0.6684414327202324, "grad_norm": 1.6870913540583619, "learning_rate": 2.716271672798579e-07, "loss": 0.4568, "step": 4143 }, { "epoch": 0.6686027750887383, "grad_norm": 1.4666128639987717, "learning_rate": 2.713898736760799e-07, "loss": 0.563, "step": 4144 }, { "epoch": 0.6687641174572443, "grad_norm": 1.547555715885978, "learning_rate": 2.7115264515410764e-07, "loss": 0.4725, "step": 4145 }, { "epoch": 0.6689254598257502, "grad_norm": 1.7189559119597433, "learning_rate": 2.709154817814768e-07, "loss": 0.4547, "step": 4146 }, { "epoch": 0.6690868021942562, "grad_norm": 1.2946679512185175, "learning_rate": 2.70678383625704e-07, "loss": 0.4108, "step": 4147 }, { "epoch": 0.6692481445627622, "grad_norm": 1.496652125448658, "learning_rate": 2.704413507542876e-07, "loss": 0.5248, "step": 4148 }, { "epoch": 0.6694094869312681, "grad_norm": 1.6574095499666945, "learning_rate": 2.702043832347074e-07, "loss": 0.5901, "step": 4149 }, { "epoch": 0.6695708292997741, "grad_norm": 2.025920743365455, "learning_rate": 2.699674811344239e-07, "loss": 0.5983, "step": 4150 }, { "epoch": 0.6697321716682801, "grad_norm": 1.4191571764685604, "learning_rate": 2.6973064452087997e-07, "loss": 0.3562, "step": 4151 }, { "epoch": 0.6698935140367861, "grad_norm": 1.451829722663653, "learning_rate": 2.694938734614993e-07, "loss": 0.4445, "step": 4152 }, { "epoch": 0.670054856405292, "grad_norm": 1.5610115591204057, "learning_rate": 2.6925716802368676e-07, "loss": 0.3531, "step": 4153 }, { "epoch": 0.670216198773798, "grad_norm": 2.022076476728929, "learning_rate": 2.690205282748289e-07, "loss": 0.4848, "step": 4154 }, { "epoch": 0.670377541142304, "grad_norm": 1.7754146628856058, "learning_rate": 2.6878395428229363e-07, "loss": 0.4307, "step": 4155 }, { "epoch": 0.6705388835108099, "grad_norm": 1.9688269584994103, "learning_rate": 2.685474461134295e-07, "loss": 0.4684, "step": 4156 }, { "epoch": 0.6707002258793159, "grad_norm": 1.7857288468993375, "learning_rate": 2.683110038355672e-07, "loss": 0.5231, "step": 4157 }, { "epoch": 0.6708615682478218, "grad_norm": 1.6385609313000495, "learning_rate": 2.680746275160179e-07, "loss": 0.4728, "step": 4158 }, { "epoch": 0.6710229106163278, "grad_norm": 1.6383918459285172, "learning_rate": 2.678383172220747e-07, "loss": 0.3999, "step": 4159 }, { "epoch": 0.6711842529848339, "grad_norm": 2.245928152862924, "learning_rate": 2.6760207302101117e-07, "loss": 0.5103, "step": 4160 }, { "epoch": 0.6713455953533398, "grad_norm": 1.7050491103317782, "learning_rate": 2.673658949800825e-07, "loss": 0.465, "step": 4161 }, { "epoch": 0.6715069377218458, "grad_norm": 1.9956288253503796, "learning_rate": 2.6712978316652534e-07, "loss": 0.5634, "step": 4162 }, { "epoch": 0.6716682800903517, "grad_norm": 1.8110189635128073, "learning_rate": 2.668937376475565e-07, "loss": 0.6124, "step": 4163 }, { "epoch": 0.6718296224588577, "grad_norm": 1.6064121416835606, "learning_rate": 2.6665775849037485e-07, "loss": 0.3761, "step": 4164 }, { "epoch": 0.6719909648273636, "grad_norm": 1.2226262683056457, "learning_rate": 2.664218457621602e-07, "loss": 0.5106, "step": 4165 }, { "epoch": 0.6721523071958696, "grad_norm": 2.161113506218837, "learning_rate": 2.661859995300729e-07, "loss": 0.5814, "step": 4166 }, { "epoch": 0.6723136495643756, "grad_norm": 1.5661616336527435, "learning_rate": 2.659502198612551e-07, "loss": 0.5613, "step": 4167 }, { "epoch": 0.6724749919328816, "grad_norm": 1.4722786578671623, "learning_rate": 2.6571450682282957e-07, "loss": 0.6517, "step": 4168 }, { "epoch": 0.6726363343013876, "grad_norm": 1.693390209088867, "learning_rate": 2.654788604818999e-07, "loss": 0.5065, "step": 4169 }, { "epoch": 0.6727976766698935, "grad_norm": 1.701124293541129, "learning_rate": 2.652432809055517e-07, "loss": 0.5947, "step": 4170 }, { "epoch": 0.6729590190383995, "grad_norm": 1.8854810868809042, "learning_rate": 2.650077681608502e-07, "loss": 0.4688, "step": 4171 }, { "epoch": 0.6731203614069055, "grad_norm": 1.5822567625969604, "learning_rate": 2.647723223148427e-07, "loss": 0.4617, "step": 4172 }, { "epoch": 0.6732817037754114, "grad_norm": 1.5874612187732362, "learning_rate": 2.6453694343455674e-07, "loss": 0.4771, "step": 4173 }, { "epoch": 0.6734430461439174, "grad_norm": 1.6237574742051941, "learning_rate": 2.6430163158700113e-07, "loss": 0.394, "step": 4174 }, { "epoch": 0.6736043885124233, "grad_norm": 2.229358510351303, "learning_rate": 2.640663868391659e-07, "loss": 0.5003, "step": 4175 }, { "epoch": 0.6737657308809293, "grad_norm": 2.302840705209137, "learning_rate": 2.638312092580211e-07, "loss": 0.5377, "step": 4176 }, { "epoch": 0.6739270732494353, "grad_norm": 1.153062189143795, "learning_rate": 2.635960989105185e-07, "loss": 0.3845, "step": 4177 }, { "epoch": 0.6740884156179413, "grad_norm": 1.5227160970984286, "learning_rate": 2.6336105586359055e-07, "loss": 0.2915, "step": 4178 }, { "epoch": 0.6742497579864473, "grad_norm": 1.3410762978709005, "learning_rate": 2.6312608018415007e-07, "loss": 0.4309, "step": 4179 }, { "epoch": 0.6744111003549532, "grad_norm": 1.5760884505154737, "learning_rate": 2.628911719390911e-07, "loss": 0.4887, "step": 4180 }, { "epoch": 0.6745724427234592, "grad_norm": 1.4365436026106337, "learning_rate": 2.626563311952886e-07, "loss": 0.4112, "step": 4181 }, { "epoch": 0.6747337850919651, "grad_norm": 1.4857342570005334, "learning_rate": 2.624215580195981e-07, "loss": 0.4334, "step": 4182 }, { "epoch": 0.6748951274604711, "grad_norm": 1.412599976976468, "learning_rate": 2.6218685247885595e-07, "loss": 0.4825, "step": 4183 }, { "epoch": 0.675056469828977, "grad_norm": 1.4950598588014916, "learning_rate": 2.6195221463987915e-07, "loss": 0.5054, "step": 4184 }, { "epoch": 0.6752178121974831, "grad_norm": 1.3880680450750547, "learning_rate": 2.6171764456946567e-07, "loss": 0.4508, "step": 4185 }, { "epoch": 0.6753791545659891, "grad_norm": 1.591462307271812, "learning_rate": 2.614831423343936e-07, "loss": 0.3599, "step": 4186 }, { "epoch": 0.675540496934495, "grad_norm": 1.5190882490589397, "learning_rate": 2.612487080014225e-07, "loss": 0.446, "step": 4187 }, { "epoch": 0.675701839303001, "grad_norm": 1.4947838688911737, "learning_rate": 2.610143416372923e-07, "loss": 0.4196, "step": 4188 }, { "epoch": 0.6758631816715069, "grad_norm": 1.6170235518496099, "learning_rate": 2.6078004330872327e-07, "loss": 0.5003, "step": 4189 }, { "epoch": 0.6760245240400129, "grad_norm": 1.295434358430389, "learning_rate": 2.6054581308241656e-07, "loss": 0.4741, "step": 4190 }, { "epoch": 0.6761858664085189, "grad_norm": 2.544006136059109, "learning_rate": 2.6031165102505427e-07, "loss": 0.5513, "step": 4191 }, { "epoch": 0.6763472087770248, "grad_norm": 1.590200098390304, "learning_rate": 2.6007755720329814e-07, "loss": 0.5371, "step": 4192 }, { "epoch": 0.6765085511455308, "grad_norm": 1.7464311858259864, "learning_rate": 2.5984353168379194e-07, "loss": 0.5404, "step": 4193 }, { "epoch": 0.6766698935140368, "grad_norm": 1.3178544949775586, "learning_rate": 2.5960957453315835e-07, "loss": 0.3445, "step": 4194 }, { "epoch": 0.6768312358825428, "grad_norm": 1.3108868449339734, "learning_rate": 2.5937568581800185e-07, "loss": 0.4759, "step": 4195 }, { "epoch": 0.6769925782510487, "grad_norm": 1.303362053183173, "learning_rate": 2.59141865604907e-07, "loss": 0.4012, "step": 4196 }, { "epoch": 0.6771539206195547, "grad_norm": 1.8804540147826099, "learning_rate": 2.5890811396043866e-07, "loss": 0.6085, "step": 4197 }, { "epoch": 0.6773152629880607, "grad_norm": 1.7357247401622904, "learning_rate": 2.5867443095114246e-07, "loss": 0.4478, "step": 4198 }, { "epoch": 0.6774766053565666, "grad_norm": 1.2866470826508272, "learning_rate": 2.584408166435441e-07, "loss": 0.4579, "step": 4199 }, { "epoch": 0.6776379477250726, "grad_norm": 2.191094850660803, "learning_rate": 2.582072711041503e-07, "loss": 0.5769, "step": 4200 }, { "epoch": 0.6777992900935785, "grad_norm": 1.16104241124313, "learning_rate": 2.57973794399448e-07, "loss": 0.3838, "step": 4201 }, { "epoch": 0.6779606324620845, "grad_norm": 1.4566732585562876, "learning_rate": 2.5774038659590404e-07, "loss": 0.5947, "step": 4202 }, { "epoch": 0.6781219748305906, "grad_norm": 1.7820329785704434, "learning_rate": 2.5750704775996623e-07, "loss": 0.5001, "step": 4203 }, { "epoch": 0.6782833171990965, "grad_norm": 1.7147816014595987, "learning_rate": 2.5727377795806273e-07, "loss": 0.5402, "step": 4204 }, { "epoch": 0.6784446595676025, "grad_norm": 1.546718782185424, "learning_rate": 2.5704057725660165e-07, "loss": 0.6314, "step": 4205 }, { "epoch": 0.6786060019361084, "grad_norm": 1.769801171875819, "learning_rate": 2.56807445721972e-07, "loss": 0.5519, "step": 4206 }, { "epoch": 0.6787673443046144, "grad_norm": 1.2945461735069521, "learning_rate": 2.565743834205424e-07, "loss": 0.4636, "step": 4207 }, { "epoch": 0.6789286866731203, "grad_norm": 1.4421539434990558, "learning_rate": 2.563413904186622e-07, "loss": 0.6234, "step": 4208 }, { "epoch": 0.6790900290416263, "grad_norm": 2.0516620597302144, "learning_rate": 2.561084667826612e-07, "loss": 0.8129, "step": 4209 }, { "epoch": 0.6792513714101323, "grad_norm": 1.4598061163729594, "learning_rate": 2.5587561257884887e-07, "loss": 0.3744, "step": 4210 }, { "epoch": 0.6794127137786383, "grad_norm": 1.7454313177988983, "learning_rate": 2.5564282787351555e-07, "loss": 0.4047, "step": 4211 }, { "epoch": 0.6795740561471443, "grad_norm": 1.3122369866012051, "learning_rate": 2.554101127329311e-07, "loss": 0.3985, "step": 4212 }, { "epoch": 0.6797353985156502, "grad_norm": 1.599472095900765, "learning_rate": 2.5517746722334613e-07, "loss": 0.4695, "step": 4213 }, { "epoch": 0.6798967408841562, "grad_norm": 1.5323477722034273, "learning_rate": 2.549448914109915e-07, "loss": 0.4096, "step": 4214 }, { "epoch": 0.6800580832526621, "grad_norm": 2.1033230465830384, "learning_rate": 2.547123853620775e-07, "loss": 0.5374, "step": 4215 }, { "epoch": 0.6802194256211681, "grad_norm": 1.647099752127989, "learning_rate": 2.544799491427956e-07, "loss": 0.4454, "step": 4216 }, { "epoch": 0.6803807679896741, "grad_norm": 1.7643200938822745, "learning_rate": 2.542475828193164e-07, "loss": 0.4777, "step": 4217 }, { "epoch": 0.68054211035818, "grad_norm": 1.2148580764956092, "learning_rate": 2.5401528645779123e-07, "loss": 0.4604, "step": 4218 }, { "epoch": 0.680703452726686, "grad_norm": 1.5970304051456619, "learning_rate": 2.5378306012435145e-07, "loss": 0.3442, "step": 4219 }, { "epoch": 0.680864795095192, "grad_norm": 1.6841281830349792, "learning_rate": 2.53550903885108e-07, "loss": 0.5126, "step": 4220 }, { "epoch": 0.681026137463698, "grad_norm": 1.8676926954912203, "learning_rate": 2.533188178061524e-07, "loss": 0.5617, "step": 4221 }, { "epoch": 0.681187479832204, "grad_norm": 1.736629086683969, "learning_rate": 2.530868019535561e-07, "loss": 0.385, "step": 4222 }, { "epoch": 0.6813488222007099, "grad_norm": 1.4762417076452892, "learning_rate": 2.5285485639337023e-07, "loss": 0.4099, "step": 4223 }, { "epoch": 0.6815101645692159, "grad_norm": 1.540400663992145, "learning_rate": 2.526229811916265e-07, "loss": 0.5076, "step": 4224 }, { "epoch": 0.6816715069377218, "grad_norm": 1.3568379195816458, "learning_rate": 2.523911764143358e-07, "loss": 0.396, "step": 4225 }, { "epoch": 0.6818328493062278, "grad_norm": 1.69013700370061, "learning_rate": 2.521594421274894e-07, "loss": 0.3676, "step": 4226 }, { "epoch": 0.6819941916747337, "grad_norm": 1.6596464922806904, "learning_rate": 2.51927778397059e-07, "loss": 0.3878, "step": 4227 }, { "epoch": 0.6821555340432398, "grad_norm": 1.5379013871176568, "learning_rate": 2.516961852889954e-07, "loss": 0.4581, "step": 4228 }, { "epoch": 0.6823168764117458, "grad_norm": 1.567233354804726, "learning_rate": 2.5146466286922974e-07, "loss": 0.3488, "step": 4229 }, { "epoch": 0.6824782187802517, "grad_norm": 3.8380603393614203, "learning_rate": 2.512332112036727e-07, "loss": 0.4471, "step": 4230 }, { "epoch": 0.6826395611487577, "grad_norm": 1.6308454136050976, "learning_rate": 2.510018303582151e-07, "loss": 0.5306, "step": 4231 }, { "epoch": 0.6828009035172636, "grad_norm": 1.1523804739579908, "learning_rate": 2.5077052039872784e-07, "loss": 0.4566, "step": 4232 }, { "epoch": 0.6829622458857696, "grad_norm": 1.8228799289452837, "learning_rate": 2.505392813910608e-07, "loss": 0.4115, "step": 4233 }, { "epoch": 0.6831235882542755, "grad_norm": 1.877437786996674, "learning_rate": 2.5030811340104454e-07, "loss": 0.5695, "step": 4234 }, { "epoch": 0.6832849306227815, "grad_norm": 1.2368844032797572, "learning_rate": 2.500770164944891e-07, "loss": 0.4226, "step": 4235 }, { "epoch": 0.6834462729912875, "grad_norm": 1.2025296608484703, "learning_rate": 2.498459907371839e-07, "loss": 0.4713, "step": 4236 }, { "epoch": 0.6836076153597935, "grad_norm": 1.8735142861507, "learning_rate": 2.4961503619489876e-07, "loss": 0.4741, "step": 4237 }, { "epoch": 0.6837689577282995, "grad_norm": 1.313711515492694, "learning_rate": 2.493841529333825e-07, "loss": 0.4141, "step": 4238 }, { "epoch": 0.6839303000968054, "grad_norm": 1.474785032609932, "learning_rate": 2.491533410183645e-07, "loss": 0.3649, "step": 4239 }, { "epoch": 0.6840916424653114, "grad_norm": 1.3924164965810955, "learning_rate": 2.489226005155534e-07, "loss": 0.327, "step": 4240 }, { "epoch": 0.6842529848338174, "grad_norm": 1.3984275263425328, "learning_rate": 2.4869193149063717e-07, "loss": 0.4737, "step": 4241 }, { "epoch": 0.6844143272023233, "grad_norm": 1.6091139498345808, "learning_rate": 2.48461334009284e-07, "loss": 0.5106, "step": 4242 }, { "epoch": 0.6845756695708293, "grad_norm": 1.624408540926162, "learning_rate": 2.4823080813714126e-07, "loss": 0.4559, "step": 4243 }, { "epoch": 0.6847370119393352, "grad_norm": 1.4663721681189805, "learning_rate": 2.480003539398362e-07, "loss": 0.4171, "step": 4244 }, { "epoch": 0.6848983543078412, "grad_norm": 1.9404157418228423, "learning_rate": 2.477699714829758e-07, "loss": 0.5684, "step": 4245 }, { "epoch": 0.6850596966763473, "grad_norm": 1.7152378611310934, "learning_rate": 2.475396608321461e-07, "loss": 0.4625, "step": 4246 }, { "epoch": 0.6852210390448532, "grad_norm": 1.4935902498126208, "learning_rate": 2.4730942205291317e-07, "loss": 0.4175, "step": 4247 }, { "epoch": 0.6853823814133592, "grad_norm": 1.3596817635108542, "learning_rate": 2.470792552108227e-07, "loss": 0.4052, "step": 4248 }, { "epoch": 0.6855437237818651, "grad_norm": 1.4884057180666455, "learning_rate": 2.4684916037139905e-07, "loss": 0.3419, "step": 4249 }, { "epoch": 0.6857050661503711, "grad_norm": 1.3706910935942251, "learning_rate": 2.4661913760014753e-07, "loss": 0.4383, "step": 4250 }, { "epoch": 0.685866408518877, "grad_norm": 1.5550344262191382, "learning_rate": 2.463891869625514e-07, "loss": 0.5492, "step": 4251 }, { "epoch": 0.686027750887383, "grad_norm": 1.753548294616237, "learning_rate": 2.4615930852407464e-07, "loss": 0.4743, "step": 4252 }, { "epoch": 0.686189093255889, "grad_norm": 1.9074556259053508, "learning_rate": 2.459295023501596e-07, "loss": 0.5238, "step": 4253 }, { "epoch": 0.686350435624395, "grad_norm": 1.8154501090458113, "learning_rate": 2.4569976850622886e-07, "loss": 0.5735, "step": 4254 }, { "epoch": 0.686511777992901, "grad_norm": 1.702376594944375, "learning_rate": 2.4547010705768427e-07, "loss": 0.4161, "step": 4255 }, { "epoch": 0.6866731203614069, "grad_norm": 1.6492967580817202, "learning_rate": 2.452405180699066e-07, "loss": 0.5822, "step": 4256 }, { "epoch": 0.6868344627299129, "grad_norm": 1.5486218386032864, "learning_rate": 2.450110016082565e-07, "loss": 0.4713, "step": 4257 }, { "epoch": 0.6869958050984188, "grad_norm": 1.3258304296845762, "learning_rate": 2.4478155773807395e-07, "loss": 0.4019, "step": 4258 }, { "epoch": 0.6871571474669248, "grad_norm": 1.275664646218518, "learning_rate": 2.4455218652467777e-07, "loss": 0.4638, "step": 4259 }, { "epoch": 0.6873184898354308, "grad_norm": 1.434674180687108, "learning_rate": 2.443228880333666e-07, "loss": 0.5156, "step": 4260 }, { "epoch": 0.6874798322039367, "grad_norm": 1.5642613973301414, "learning_rate": 2.4409366232941827e-07, "loss": 0.6776, "step": 4261 }, { "epoch": 0.6876411745724427, "grad_norm": 1.413399388071254, "learning_rate": 2.438645094780897e-07, "loss": 0.536, "step": 4262 }, { "epoch": 0.6878025169409487, "grad_norm": 1.4617866863242042, "learning_rate": 2.4363542954461753e-07, "loss": 0.3596, "step": 4263 }, { "epoch": 0.6879638593094547, "grad_norm": 1.568725771165961, "learning_rate": 2.434064225942169e-07, "loss": 0.4519, "step": 4264 }, { "epoch": 0.6881252016779607, "grad_norm": 1.9649312358210216, "learning_rate": 2.4317748869208286e-07, "loss": 0.3947, "step": 4265 }, { "epoch": 0.6882865440464666, "grad_norm": 1.5166280514478818, "learning_rate": 2.4294862790338916e-07, "loss": 0.4756, "step": 4266 }, { "epoch": 0.6884478864149726, "grad_norm": 0.9634973807940196, "learning_rate": 2.427198402932891e-07, "loss": 0.3348, "step": 4267 }, { "epoch": 0.6886092287834785, "grad_norm": 1.492715712123326, "learning_rate": 2.424911259269152e-07, "loss": 0.4585, "step": 4268 }, { "epoch": 0.6887705711519845, "grad_norm": 1.563160489673546, "learning_rate": 2.422624848693785e-07, "loss": 0.4586, "step": 4269 }, { "epoch": 0.6889319135204904, "grad_norm": 1.7093290668013752, "learning_rate": 2.420339171857699e-07, "loss": 0.4027, "step": 4270 }, { "epoch": 0.6890932558889965, "grad_norm": 1.3668821157262379, "learning_rate": 2.418054229411593e-07, "loss": 0.4579, "step": 4271 }, { "epoch": 0.6892545982575025, "grad_norm": 1.4437203375341234, "learning_rate": 2.4157700220059487e-07, "loss": 0.4262, "step": 4272 }, { "epoch": 0.6894159406260084, "grad_norm": 2.528067672324709, "learning_rate": 2.413486550291053e-07, "loss": 0.6075, "step": 4273 }, { "epoch": 0.6895772829945144, "grad_norm": 1.3710180625782997, "learning_rate": 2.41120381491697e-07, "loss": 0.4736, "step": 4274 }, { "epoch": 0.6897386253630203, "grad_norm": 1.1949066270876088, "learning_rate": 2.408921816533561e-07, "loss": 0.4786, "step": 4275 }, { "epoch": 0.6898999677315263, "grad_norm": 1.3782163196159378, "learning_rate": 2.4066405557904785e-07, "loss": 0.461, "step": 4276 }, { "epoch": 0.6900613101000322, "grad_norm": 1.4393584635650538, "learning_rate": 2.404360033337158e-07, "loss": 0.4759, "step": 4277 }, { "epoch": 0.6902226524685382, "grad_norm": 1.9491084799603746, "learning_rate": 2.4020802498228334e-07, "loss": 0.5608, "step": 4278 }, { "epoch": 0.6903839948370442, "grad_norm": 2.127699875906462, "learning_rate": 2.3998012058965205e-07, "loss": 0.8644, "step": 4279 }, { "epoch": 0.6905453372055502, "grad_norm": 1.6757473886621188, "learning_rate": 2.39752290220703e-07, "loss": 0.6253, "step": 4280 }, { "epoch": 0.6907066795740562, "grad_norm": 2.0577328237114014, "learning_rate": 2.395245339402963e-07, "loss": 0.6656, "step": 4281 }, { "epoch": 0.6908680219425621, "grad_norm": 1.9752304353159067, "learning_rate": 2.392968518132702e-07, "loss": 0.5183, "step": 4282 }, { "epoch": 0.6910293643110681, "grad_norm": 1.9305984262086766, "learning_rate": 2.3906924390444245e-07, "loss": 0.7043, "step": 4283 }, { "epoch": 0.691190706679574, "grad_norm": 1.6381528848649274, "learning_rate": 2.3884171027860967e-07, "loss": 0.5073, "step": 4284 }, { "epoch": 0.69135204904808, "grad_norm": 1.402096272258185, "learning_rate": 2.386142510005471e-07, "loss": 0.3735, "step": 4285 }, { "epoch": 0.691513391416586, "grad_norm": 1.2144193736012396, "learning_rate": 2.3838686613500924e-07, "loss": 0.3606, "step": 4286 }, { "epoch": 0.6916747337850919, "grad_norm": 1.2481680797769767, "learning_rate": 2.3815955574672853e-07, "loss": 0.4704, "step": 4287 }, { "epoch": 0.691836076153598, "grad_norm": 1.5790911445910985, "learning_rate": 2.3793231990041707e-07, "loss": 0.371, "step": 4288 }, { "epoch": 0.691997418522104, "grad_norm": 1.6362842356318237, "learning_rate": 2.3770515866076563e-07, "loss": 0.5053, "step": 4289 }, { "epoch": 0.6921587608906099, "grad_norm": 1.5681680488987115, "learning_rate": 2.3747807209244309e-07, "loss": 0.5812, "step": 4290 }, { "epoch": 0.6923201032591159, "grad_norm": 1.470586318687985, "learning_rate": 2.372510602600979e-07, "loss": 0.4676, "step": 4291 }, { "epoch": 0.6924814456276218, "grad_norm": 1.4929718831986107, "learning_rate": 2.3702412322835652e-07, "loss": 0.5195, "step": 4292 }, { "epoch": 0.6926427879961278, "grad_norm": 1.473435254059877, "learning_rate": 2.3679726106182464e-07, "loss": 0.4193, "step": 4293 }, { "epoch": 0.6928041303646337, "grad_norm": 2.4818104877219276, "learning_rate": 2.365704738250866e-07, "loss": 0.4499, "step": 4294 }, { "epoch": 0.6929654727331397, "grad_norm": 1.1606263001598498, "learning_rate": 2.3634376158270474e-07, "loss": 0.5208, "step": 4295 }, { "epoch": 0.6931268151016456, "grad_norm": 1.174190877760024, "learning_rate": 2.3611712439922126e-07, "loss": 0.3601, "step": 4296 }, { "epoch": 0.6932881574701517, "grad_norm": 1.9522651916061335, "learning_rate": 2.3589056233915583e-07, "loss": 0.5984, "step": 4297 }, { "epoch": 0.6934494998386577, "grad_norm": 1.5590785904920144, "learning_rate": 2.356640754670073e-07, "loss": 0.5198, "step": 4298 }, { "epoch": 0.6936108422071636, "grad_norm": 2.0541383422797095, "learning_rate": 2.3543766384725327e-07, "loss": 0.7091, "step": 4299 }, { "epoch": 0.6937721845756696, "grad_norm": 1.4272974600799868, "learning_rate": 2.3521132754434924e-07, "loss": 0.286, "step": 4300 }, { "epoch": 0.6939335269441755, "grad_norm": 1.4380382691067464, "learning_rate": 2.3498506662272987e-07, "loss": 0.5671, "step": 4301 }, { "epoch": 0.6940948693126815, "grad_norm": 1.6561773032302753, "learning_rate": 2.3475888114680842e-07, "loss": 0.4265, "step": 4302 }, { "epoch": 0.6942562116811875, "grad_norm": 2.5860406676396797, "learning_rate": 2.3453277118097602e-07, "loss": 0.3917, "step": 4303 }, { "epoch": 0.6944175540496934, "grad_norm": 1.6237798658389189, "learning_rate": 2.3430673678960316e-07, "loss": 0.4145, "step": 4304 }, { "epoch": 0.6945788964181994, "grad_norm": 1.9154694038164084, "learning_rate": 2.3408077803703795e-07, "loss": 0.476, "step": 4305 }, { "epoch": 0.6947402387867054, "grad_norm": 1.8307641696613388, "learning_rate": 2.3385489498760758e-07, "loss": 0.5267, "step": 4306 }, { "epoch": 0.6949015811552114, "grad_norm": 1.8102715045192703, "learning_rate": 2.3362908770561747e-07, "loss": 0.6225, "step": 4307 }, { "epoch": 0.6950629235237173, "grad_norm": 1.7230017352369429, "learning_rate": 2.334033562553515e-07, "loss": 0.4047, "step": 4308 }, { "epoch": 0.6952242658922233, "grad_norm": 1.7261133774951671, "learning_rate": 2.3317770070107223e-07, "loss": 0.5144, "step": 4309 }, { "epoch": 0.6953856082607293, "grad_norm": 1.4188168514146835, "learning_rate": 2.3295212110701988e-07, "loss": 0.4305, "step": 4310 }, { "epoch": 0.6955469506292352, "grad_norm": 1.8435905516908941, "learning_rate": 2.327266175374138e-07, "loss": 0.5037, "step": 4311 }, { "epoch": 0.6957082929977412, "grad_norm": 1.725481134847106, "learning_rate": 2.3250119005645146e-07, "loss": 0.6185, "step": 4312 }, { "epoch": 0.6958696353662471, "grad_norm": 1.6821196225025017, "learning_rate": 2.3227583872830837e-07, "loss": 0.617, "step": 4313 }, { "epoch": 0.6960309777347532, "grad_norm": 1.3551758991243328, "learning_rate": 2.320505636171387e-07, "loss": 0.4828, "step": 4314 }, { "epoch": 0.6961923201032592, "grad_norm": 1.4694231803780102, "learning_rate": 2.3182536478707503e-07, "loss": 0.5833, "step": 4315 }, { "epoch": 0.6963536624717651, "grad_norm": 1.236072869489147, "learning_rate": 2.316002423022277e-07, "loss": 0.4263, "step": 4316 }, { "epoch": 0.6965150048402711, "grad_norm": 1.6321373232586396, "learning_rate": 2.3137519622668594e-07, "loss": 0.5722, "step": 4317 }, { "epoch": 0.696676347208777, "grad_norm": 1.275448480903229, "learning_rate": 2.311502266245164e-07, "loss": 0.4815, "step": 4318 }, { "epoch": 0.696837689577283, "grad_norm": 1.8009834093328396, "learning_rate": 2.3092533355976506e-07, "loss": 0.5446, "step": 4319 }, { "epoch": 0.6969990319457889, "grad_norm": 1.7087891908628796, "learning_rate": 2.3070051709645555e-07, "loss": 0.6599, "step": 4320 }, { "epoch": 0.6971603743142949, "grad_norm": 1.4590760474149018, "learning_rate": 2.3047577729858924e-07, "loss": 0.3557, "step": 4321 }, { "epoch": 0.6973217166828009, "grad_norm": 1.2195900198480887, "learning_rate": 2.3025111423014653e-07, "loss": 0.4172, "step": 4322 }, { "epoch": 0.6974830590513069, "grad_norm": 1.8755651893727374, "learning_rate": 2.300265279550852e-07, "loss": 0.5959, "step": 4323 }, { "epoch": 0.6976444014198129, "grad_norm": 1.4295761815493253, "learning_rate": 2.298020185373417e-07, "loss": 0.6152, "step": 4324 }, { "epoch": 0.6978057437883188, "grad_norm": 1.8548581052116522, "learning_rate": 2.2957758604083056e-07, "loss": 0.3979, "step": 4325 }, { "epoch": 0.6979670861568248, "grad_norm": 1.3584727219674493, "learning_rate": 2.29353230529444e-07, "loss": 0.4963, "step": 4326 }, { "epoch": 0.6981284285253307, "grad_norm": 1.4540849201794865, "learning_rate": 2.2912895206705274e-07, "loss": 0.4831, "step": 4327 }, { "epoch": 0.6982897708938367, "grad_norm": 1.2055952301225932, "learning_rate": 2.2890475071750559e-07, "loss": 0.4167, "step": 4328 }, { "epoch": 0.6984511132623427, "grad_norm": 1.9522110288278955, "learning_rate": 2.2868062654462887e-07, "loss": 0.4448, "step": 4329 }, { "epoch": 0.6986124556308486, "grad_norm": 1.8107420189636962, "learning_rate": 2.2845657961222758e-07, "loss": 0.6631, "step": 4330 }, { "epoch": 0.6987737979993547, "grad_norm": 1.772517364696675, "learning_rate": 2.2823260998408432e-07, "loss": 0.5615, "step": 4331 }, { "epoch": 0.6989351403678606, "grad_norm": 2.033761687054667, "learning_rate": 2.2800871772395995e-07, "loss": 0.5273, "step": 4332 }, { "epoch": 0.6990964827363666, "grad_norm": 1.8349179297863742, "learning_rate": 2.2778490289559328e-07, "loss": 0.4303, "step": 4333 }, { "epoch": 0.6992578251048726, "grad_norm": 1.3657341780328254, "learning_rate": 2.275611655627006e-07, "loss": 0.4703, "step": 4334 }, { "epoch": 0.6994191674733785, "grad_norm": 1.671032702314892, "learning_rate": 2.2733750578897686e-07, "loss": 0.5453, "step": 4335 }, { "epoch": 0.6995805098418845, "grad_norm": 1.8956532095832872, "learning_rate": 2.271139236380943e-07, "loss": 0.4976, "step": 4336 }, { "epoch": 0.6997418522103904, "grad_norm": 1.560347256646761, "learning_rate": 2.2689041917370343e-07, "loss": 0.4127, "step": 4337 }, { "epoch": 0.6999031945788964, "grad_norm": 1.2550154204010149, "learning_rate": 2.2666699245943276e-07, "loss": 0.4419, "step": 4338 }, { "epoch": 0.7000645369474023, "grad_norm": 1.7035967672153889, "learning_rate": 2.2644364355888813e-07, "loss": 0.41, "step": 4339 }, { "epoch": 0.7002258793159084, "grad_norm": 1.8077926674219031, "learning_rate": 2.2622037253565363e-07, "loss": 0.513, "step": 4340 }, { "epoch": 0.7003872216844144, "grad_norm": 1.3605652782981859, "learning_rate": 2.259971794532912e-07, "loss": 0.401, "step": 4341 }, { "epoch": 0.7005485640529203, "grad_norm": 1.4181877366467115, "learning_rate": 2.257740643753405e-07, "loss": 0.5478, "step": 4342 }, { "epoch": 0.7007099064214263, "grad_norm": 1.3296567722808954, "learning_rate": 2.2555102736531917e-07, "loss": 0.5259, "step": 4343 }, { "epoch": 0.7008712487899322, "grad_norm": 1.5862347554465563, "learning_rate": 2.2532806848672205e-07, "loss": 0.4017, "step": 4344 }, { "epoch": 0.7010325911584382, "grad_norm": 1.393243617282925, "learning_rate": 2.2510518780302225e-07, "loss": 0.3954, "step": 4345 }, { "epoch": 0.7011939335269441, "grad_norm": 1.2549008616771045, "learning_rate": 2.248823853776707e-07, "loss": 0.3964, "step": 4346 }, { "epoch": 0.7013552758954501, "grad_norm": 1.1665679799075601, "learning_rate": 2.2465966127409546e-07, "loss": 0.5135, "step": 4347 }, { "epoch": 0.7015166182639561, "grad_norm": 1.7705844330238965, "learning_rate": 2.2443701555570315e-07, "loss": 0.5993, "step": 4348 }, { "epoch": 0.7016779606324621, "grad_norm": 1.7530042882149839, "learning_rate": 2.2421444828587715e-07, "loss": 0.4726, "step": 4349 }, { "epoch": 0.7018393030009681, "grad_norm": 1.5263014258766374, "learning_rate": 2.239919595279791e-07, "loss": 0.4306, "step": 4350 }, { "epoch": 0.702000645369474, "grad_norm": 1.7300992618595157, "learning_rate": 2.237695493453484e-07, "loss": 0.4413, "step": 4351 }, { "epoch": 0.70216198773798, "grad_norm": 1.5912715115108256, "learning_rate": 2.2354721780130148e-07, "loss": 0.5827, "step": 4352 }, { "epoch": 0.702323330106486, "grad_norm": 1.474305542218989, "learning_rate": 2.2332496495913289e-07, "loss": 0.496, "step": 4353 }, { "epoch": 0.7024846724749919, "grad_norm": 1.6816016357722434, "learning_rate": 2.2310279088211453e-07, "loss": 0.4029, "step": 4354 }, { "epoch": 0.7026460148434979, "grad_norm": 1.3243453153818225, "learning_rate": 2.2288069563349605e-07, "loss": 0.4218, "step": 4355 }, { "epoch": 0.7028073572120038, "grad_norm": 1.3585289701139447, "learning_rate": 2.2265867927650473e-07, "loss": 0.5001, "step": 4356 }, { "epoch": 0.7029686995805099, "grad_norm": 1.1503452881459262, "learning_rate": 2.2243674187434485e-07, "loss": 0.5285, "step": 4357 }, { "epoch": 0.7031300419490158, "grad_norm": 2.0649869548025332, "learning_rate": 2.2221488349019902e-07, "loss": 0.5814, "step": 4358 }, { "epoch": 0.7032913843175218, "grad_norm": 1.0738025205688109, "learning_rate": 2.219931041872265e-07, "loss": 0.3951, "step": 4359 }, { "epoch": 0.7034527266860278, "grad_norm": 1.1628912851718083, "learning_rate": 2.2177140402856464e-07, "loss": 0.3337, "step": 4360 }, { "epoch": 0.7036140690545337, "grad_norm": 1.7233378128175543, "learning_rate": 2.2154978307732831e-07, "loss": 0.4131, "step": 4361 }, { "epoch": 0.7037754114230397, "grad_norm": 1.982523677824034, "learning_rate": 2.213282413966092e-07, "loss": 0.4689, "step": 4362 }, { "epoch": 0.7039367537915456, "grad_norm": 2.280494878500967, "learning_rate": 2.2110677904947706e-07, "loss": 0.6273, "step": 4363 }, { "epoch": 0.7040980961600516, "grad_norm": 1.487446067537579, "learning_rate": 2.2088539609897883e-07, "loss": 0.4744, "step": 4364 }, { "epoch": 0.7042594385285575, "grad_norm": 1.7451664384532044, "learning_rate": 2.2066409260813878e-07, "loss": 0.4767, "step": 4365 }, { "epoch": 0.7044207808970636, "grad_norm": 1.1728690953655427, "learning_rate": 2.2044286863995892e-07, "loss": 0.4488, "step": 4366 }, { "epoch": 0.7045821232655696, "grad_norm": 1.988264881958505, "learning_rate": 2.2022172425741792e-07, "loss": 0.5468, "step": 4367 }, { "epoch": 0.7047434656340755, "grad_norm": 1.6738309025602318, "learning_rate": 2.2000065952347236e-07, "loss": 0.4934, "step": 4368 }, { "epoch": 0.7049048080025815, "grad_norm": 1.3053931423827112, "learning_rate": 2.1977967450105615e-07, "loss": 0.5632, "step": 4369 }, { "epoch": 0.7050661503710874, "grad_norm": 1.4558114271402776, "learning_rate": 2.1955876925308002e-07, "loss": 0.3011, "step": 4370 }, { "epoch": 0.7052274927395934, "grad_norm": 1.2543258202704817, "learning_rate": 2.1933794384243277e-07, "loss": 0.3787, "step": 4371 }, { "epoch": 0.7053888351080994, "grad_norm": 1.9872381025777133, "learning_rate": 2.1911719833197944e-07, "loss": 0.4427, "step": 4372 }, { "epoch": 0.7055501774766053, "grad_norm": 1.6516022156818557, "learning_rate": 2.1889653278456326e-07, "loss": 0.4663, "step": 4373 }, { "epoch": 0.7057115198451114, "grad_norm": 1.191793310109436, "learning_rate": 2.1867594726300448e-07, "loss": 0.3541, "step": 4374 }, { "epoch": 0.7058728622136173, "grad_norm": 1.3339317389585756, "learning_rate": 2.184554418301e-07, "loss": 0.5311, "step": 4375 }, { "epoch": 0.7060342045821233, "grad_norm": 1.5682844284221522, "learning_rate": 2.182350165486246e-07, "loss": 0.5176, "step": 4376 }, { "epoch": 0.7061955469506292, "grad_norm": 1.8547029541194486, "learning_rate": 2.180146714813299e-07, "loss": 0.4368, "step": 4377 }, { "epoch": 0.7063568893191352, "grad_norm": 1.3254863332319409, "learning_rate": 2.1779440669094485e-07, "loss": 0.5835, "step": 4378 }, { "epoch": 0.7065182316876412, "grad_norm": 1.1017842171109682, "learning_rate": 2.1757422224017558e-07, "loss": 0.4065, "step": 4379 }, { "epoch": 0.7066795740561471, "grad_norm": 2.000756478295926, "learning_rate": 2.1735411819170495e-07, "loss": 0.6671, "step": 4380 }, { "epoch": 0.7068409164246531, "grad_norm": 1.526566798068412, "learning_rate": 2.171340946081934e-07, "loss": 0.5337, "step": 4381 }, { "epoch": 0.707002258793159, "grad_norm": 1.709013113597649, "learning_rate": 2.1691415155227832e-07, "loss": 0.4059, "step": 4382 }, { "epoch": 0.7071636011616651, "grad_norm": 1.3419692638106553, "learning_rate": 2.1669428908657395e-07, "loss": 0.3656, "step": 4383 }, { "epoch": 0.7073249435301711, "grad_norm": 1.3777807900095043, "learning_rate": 2.16474507273672e-07, "loss": 0.2649, "step": 4384 }, { "epoch": 0.707486285898677, "grad_norm": 1.7163022037132112, "learning_rate": 2.1625480617614067e-07, "loss": 0.3276, "step": 4385 }, { "epoch": 0.707647628267183, "grad_norm": 1.3526161364688074, "learning_rate": 2.1603518585652575e-07, "loss": 0.4508, "step": 4386 }, { "epoch": 0.7078089706356889, "grad_norm": 1.431391633201367, "learning_rate": 2.1581564637734973e-07, "loss": 0.3855, "step": 4387 }, { "epoch": 0.7079703130041949, "grad_norm": 1.9192958366045587, "learning_rate": 2.155961878011121e-07, "loss": 0.4852, "step": 4388 }, { "epoch": 0.7081316553727008, "grad_norm": 1.7899675370581232, "learning_rate": 2.153768101902896e-07, "loss": 0.5995, "step": 4389 }, { "epoch": 0.7082929977412068, "grad_norm": 1.7261138609309141, "learning_rate": 2.1515751360733526e-07, "loss": 0.4802, "step": 4390 }, { "epoch": 0.7084543401097129, "grad_norm": 1.4055978322403746, "learning_rate": 2.1493829811467972e-07, "loss": 0.4364, "step": 4391 }, { "epoch": 0.7086156824782188, "grad_norm": 1.854243787668948, "learning_rate": 2.1471916377473041e-07, "loss": 0.5866, "step": 4392 }, { "epoch": 0.7087770248467248, "grad_norm": 1.9003956433001574, "learning_rate": 2.1450011064987118e-07, "loss": 0.5355, "step": 4393 }, { "epoch": 0.7089383672152307, "grad_norm": 1.2138169048817289, "learning_rate": 2.142811388024633e-07, "loss": 0.3468, "step": 4394 }, { "epoch": 0.7090997095837367, "grad_norm": 1.3609792516809962, "learning_rate": 2.140622482948448e-07, "loss": 0.3443, "step": 4395 }, { "epoch": 0.7092610519522426, "grad_norm": 1.4364563014085472, "learning_rate": 2.1384343918933023e-07, "loss": 0.5703, "step": 4396 }, { "epoch": 0.7094223943207486, "grad_norm": 1.3996903332466328, "learning_rate": 2.136247115482115e-07, "loss": 0.6176, "step": 4397 }, { "epoch": 0.7095837366892546, "grad_norm": 1.8338780966485504, "learning_rate": 2.1340606543375662e-07, "loss": 0.6849, "step": 4398 }, { "epoch": 0.7097450790577605, "grad_norm": 1.3297526091470955, "learning_rate": 2.131875009082108e-07, "loss": 0.4275, "step": 4399 }, { "epoch": 0.7099064214262666, "grad_norm": 2.0175913600727564, "learning_rate": 2.1296901803379658e-07, "loss": 0.5987, "step": 4400 }, { "epoch": 0.7100677637947725, "grad_norm": 2.050355587334885, "learning_rate": 2.1275061687271216e-07, "loss": 0.594, "step": 4401 }, { "epoch": 0.7102291061632785, "grad_norm": 1.7707450695106535, "learning_rate": 2.1253229748713342e-07, "loss": 0.5405, "step": 4402 }, { "epoch": 0.7103904485317845, "grad_norm": 1.1269000221051362, "learning_rate": 2.1231405993921208e-07, "loss": 0.3507, "step": 4403 }, { "epoch": 0.7105517909002904, "grad_norm": 1.2316777178656133, "learning_rate": 2.1209590429107733e-07, "loss": 0.4474, "step": 4404 }, { "epoch": 0.7107131332687964, "grad_norm": 1.1642315121018916, "learning_rate": 2.1187783060483484e-07, "loss": 0.4552, "step": 4405 }, { "epoch": 0.7108744756373023, "grad_norm": 2.1728095369882516, "learning_rate": 2.1165983894256646e-07, "loss": 0.6007, "step": 4406 }, { "epoch": 0.7110358180058083, "grad_norm": 1.5487965680403557, "learning_rate": 2.1144192936633138e-07, "loss": 0.5071, "step": 4407 }, { "epoch": 0.7111971603743142, "grad_norm": 1.3372545052661267, "learning_rate": 2.112241019381652e-07, "loss": 0.3962, "step": 4408 }, { "epoch": 0.7113585027428203, "grad_norm": 1.4496545676133408, "learning_rate": 2.1100635672007978e-07, "loss": 0.422, "step": 4409 }, { "epoch": 0.7115198451113263, "grad_norm": 1.4743129002719624, "learning_rate": 2.1078869377406394e-07, "loss": 0.4094, "step": 4410 }, { "epoch": 0.7116811874798322, "grad_norm": 2.7004359988231963, "learning_rate": 2.1057111316208304e-07, "loss": 0.5858, "step": 4411 }, { "epoch": 0.7118425298483382, "grad_norm": 1.561937917699144, "learning_rate": 2.10353614946079e-07, "loss": 0.4929, "step": 4412 }, { "epoch": 0.7120038722168441, "grad_norm": 1.8184504993195008, "learning_rate": 2.1013619918797033e-07, "loss": 0.5423, "step": 4413 }, { "epoch": 0.7121652145853501, "grad_norm": 1.8136277472151638, "learning_rate": 2.0991886594965168e-07, "loss": 0.5649, "step": 4414 }, { "epoch": 0.712326556953856, "grad_norm": 1.1456705613553357, "learning_rate": 2.097016152929948e-07, "loss": 0.4477, "step": 4415 }, { "epoch": 0.712487899322362, "grad_norm": 1.3549544708232248, "learning_rate": 2.0948444727984733e-07, "loss": 0.4187, "step": 4416 }, { "epoch": 0.7126492416908681, "grad_norm": 1.4088230752481607, "learning_rate": 2.092673619720338e-07, "loss": 0.488, "step": 4417 }, { "epoch": 0.712810584059374, "grad_norm": 1.7547477623500924, "learning_rate": 2.090503594313553e-07, "loss": 0.5561, "step": 4418 }, { "epoch": 0.71297192642788, "grad_norm": 1.6714951983995363, "learning_rate": 2.0883343971958872e-07, "loss": 0.4995, "step": 4419 }, { "epoch": 0.7131332687963859, "grad_norm": 1.6110558482079143, "learning_rate": 2.0861660289848803e-07, "loss": 0.3672, "step": 4420 }, { "epoch": 0.7132946111648919, "grad_norm": 1.606927733177364, "learning_rate": 2.0839984902978353e-07, "loss": 0.3082, "step": 4421 }, { "epoch": 0.7134559535333979, "grad_norm": 1.7481210021599618, "learning_rate": 2.0818317817518117e-07, "loss": 0.5957, "step": 4422 }, { "epoch": 0.7136172959019038, "grad_norm": 1.588207330592615, "learning_rate": 2.0796659039636448e-07, "loss": 0.4947, "step": 4423 }, { "epoch": 0.7137786382704098, "grad_norm": 1.557354809896014, "learning_rate": 2.0775008575499226e-07, "loss": 0.5349, "step": 4424 }, { "epoch": 0.7139399806389157, "grad_norm": 1.7847181320968184, "learning_rate": 2.075336643127002e-07, "loss": 0.4944, "step": 4425 }, { "epoch": 0.7141013230074218, "grad_norm": 1.9164461202474552, "learning_rate": 2.0731732613110037e-07, "loss": 0.4991, "step": 4426 }, { "epoch": 0.7142626653759278, "grad_norm": 1.4452522883889531, "learning_rate": 2.0710107127178057e-07, "loss": 0.5418, "step": 4427 }, { "epoch": 0.7144240077444337, "grad_norm": 1.4058700048114217, "learning_rate": 2.0688489979630557e-07, "loss": 0.3206, "step": 4428 }, { "epoch": 0.7145853501129397, "grad_norm": 1.6618454182299258, "learning_rate": 2.066688117662158e-07, "loss": 0.5144, "step": 4429 }, { "epoch": 0.7147466924814456, "grad_norm": 1.6944971189319638, "learning_rate": 2.0645280724302833e-07, "loss": 0.4826, "step": 4430 }, { "epoch": 0.7149080348499516, "grad_norm": 1.5836655452282424, "learning_rate": 2.062368862882366e-07, "loss": 0.463, "step": 4431 }, { "epoch": 0.7150693772184575, "grad_norm": 1.6647675502600734, "learning_rate": 2.0602104896330958e-07, "loss": 0.3723, "step": 4432 }, { "epoch": 0.7152307195869635, "grad_norm": 1.3804617691079417, "learning_rate": 2.0580529532969304e-07, "loss": 0.4103, "step": 4433 }, { "epoch": 0.7153920619554696, "grad_norm": 1.704828792489275, "learning_rate": 2.055896254488087e-07, "loss": 0.577, "step": 4434 }, { "epoch": 0.7155534043239755, "grad_norm": 1.8103580973127888, "learning_rate": 2.0537403938205461e-07, "loss": 0.622, "step": 4435 }, { "epoch": 0.7157147466924815, "grad_norm": 1.6560211023469649, "learning_rate": 2.0515853719080484e-07, "loss": 0.4238, "step": 4436 }, { "epoch": 0.7158760890609874, "grad_norm": 1.4577418989971431, "learning_rate": 2.0494311893640936e-07, "loss": 0.5308, "step": 4437 }, { "epoch": 0.7160374314294934, "grad_norm": 1.380768380411073, "learning_rate": 2.0472778468019452e-07, "loss": 0.4293, "step": 4438 }, { "epoch": 0.7161987737979993, "grad_norm": 1.5710091634977255, "learning_rate": 2.0451253448346296e-07, "loss": 0.5023, "step": 4439 }, { "epoch": 0.7163601161665053, "grad_norm": 2.057438506346535, "learning_rate": 2.0429736840749263e-07, "loss": 0.6273, "step": 4440 }, { "epoch": 0.7165214585350113, "grad_norm": 1.9685611331507544, "learning_rate": 2.040822865135385e-07, "loss": 0.4467, "step": 4441 }, { "epoch": 0.7166828009035172, "grad_norm": 1.862832901152918, "learning_rate": 2.0386728886283066e-07, "loss": 0.5494, "step": 4442 }, { "epoch": 0.7168441432720233, "grad_norm": 1.3988033221106368, "learning_rate": 2.0365237551657595e-07, "loss": 0.4919, "step": 4443 }, { "epoch": 0.7170054856405292, "grad_norm": 1.6716558722809611, "learning_rate": 2.0343754653595691e-07, "loss": 0.5361, "step": 4444 }, { "epoch": 0.7171668280090352, "grad_norm": 1.582482318387679, "learning_rate": 2.0322280198213164e-07, "loss": 0.4255, "step": 4445 }, { "epoch": 0.7173281703775412, "grad_norm": 1.9160507152272233, "learning_rate": 2.0300814191623538e-07, "loss": 0.658, "step": 4446 }, { "epoch": 0.7174895127460471, "grad_norm": 1.5608165064018558, "learning_rate": 2.0279356639937794e-07, "loss": 0.4404, "step": 4447 }, { "epoch": 0.7176508551145531, "grad_norm": 1.3565586775994114, "learning_rate": 2.0257907549264597e-07, "loss": 0.4052, "step": 4448 }, { "epoch": 0.717812197483059, "grad_norm": 1.4483370913832487, "learning_rate": 2.0236466925710178e-07, "loss": 0.4463, "step": 4449 }, { "epoch": 0.717973539851565, "grad_norm": 1.7992439907620803, "learning_rate": 2.021503477537833e-07, "loss": 0.47, "step": 4450 }, { "epoch": 0.7181348822200709, "grad_norm": 1.4070016759362234, "learning_rate": 2.0193611104370478e-07, "loss": 0.3506, "step": 4451 }, { "epoch": 0.718296224588577, "grad_norm": 1.6244110360575466, "learning_rate": 2.0172195918785623e-07, "loss": 0.4726, "step": 4452 }, { "epoch": 0.718457566957083, "grad_norm": 1.4650049553224258, "learning_rate": 2.015078922472031e-07, "loss": 0.3193, "step": 4453 }, { "epoch": 0.7186189093255889, "grad_norm": 1.4781931597295779, "learning_rate": 2.0129391028268734e-07, "loss": 0.4681, "step": 4454 }, { "epoch": 0.7187802516940949, "grad_norm": 1.271818524355715, "learning_rate": 2.010800133552259e-07, "loss": 0.4228, "step": 4455 }, { "epoch": 0.7189415940626008, "grad_norm": 1.7120277031947666, "learning_rate": 2.0086620152571228e-07, "loss": 0.6694, "step": 4456 }, { "epoch": 0.7191029364311068, "grad_norm": 1.7521377177043955, "learning_rate": 2.006524748550153e-07, "loss": 0.6059, "step": 4457 }, { "epoch": 0.7192642787996127, "grad_norm": 1.5575970591616775, "learning_rate": 2.0043883340397978e-07, "loss": 0.5179, "step": 4458 }, { "epoch": 0.7194256211681187, "grad_norm": 1.401285776571453, "learning_rate": 2.002252772334263e-07, "loss": 0.4532, "step": 4459 }, { "epoch": 0.7195869635366248, "grad_norm": 1.7409941002857001, "learning_rate": 2.0001180640415072e-07, "loss": 0.4587, "step": 4460 }, { "epoch": 0.7197483059051307, "grad_norm": 1.7470194775609735, "learning_rate": 1.9979842097692501e-07, "loss": 0.4152, "step": 4461 }, { "epoch": 0.7199096482736367, "grad_norm": 1.1576032709476207, "learning_rate": 1.9958512101249703e-07, "loss": 0.4081, "step": 4462 }, { "epoch": 0.7200709906421426, "grad_norm": 1.9922285487115965, "learning_rate": 1.9937190657158953e-07, "loss": 0.5217, "step": 4463 }, { "epoch": 0.7202323330106486, "grad_norm": 1.275023192774442, "learning_rate": 1.9915877771490169e-07, "loss": 0.4542, "step": 4464 }, { "epoch": 0.7203936753791546, "grad_norm": 1.4865514760373464, "learning_rate": 1.989457345031082e-07, "loss": 0.3506, "step": 4465 }, { "epoch": 0.7205550177476605, "grad_norm": 1.1705298778300721, "learning_rate": 1.9873277699685875e-07, "loss": 0.359, "step": 4466 }, { "epoch": 0.7207163601161665, "grad_norm": 1.3019040963469237, "learning_rate": 1.9851990525677958e-07, "loss": 0.5716, "step": 4467 }, { "epoch": 0.7208777024846724, "grad_norm": 1.5822509613058298, "learning_rate": 1.983071193434714e-07, "loss": 0.3705, "step": 4468 }, { "epoch": 0.7210390448531785, "grad_norm": 1.6564246751336082, "learning_rate": 1.9809441931751182e-07, "loss": 0.3878, "step": 4469 }, { "epoch": 0.7212003872216844, "grad_norm": 1.5878310151307298, "learning_rate": 1.9788180523945275e-07, "loss": 0.4803, "step": 4470 }, { "epoch": 0.7213617295901904, "grad_norm": 1.5993572404359164, "learning_rate": 1.9766927716982234e-07, "loss": 0.5562, "step": 4471 }, { "epoch": 0.7215230719586964, "grad_norm": 1.4803538416250976, "learning_rate": 1.9745683516912432e-07, "loss": 0.4353, "step": 4472 }, { "epoch": 0.7216844143272023, "grad_norm": 1.1836181738823102, "learning_rate": 1.9724447929783728e-07, "loss": 0.4792, "step": 4473 }, { "epoch": 0.7218457566957083, "grad_norm": 1.6117822365546532, "learning_rate": 1.9703220961641582e-07, "loss": 0.3716, "step": 4474 }, { "epoch": 0.7220070990642142, "grad_norm": 1.8028629211685547, "learning_rate": 1.9682002618529015e-07, "loss": 0.4857, "step": 4475 }, { "epoch": 0.7221684414327202, "grad_norm": 1.4518703663916674, "learning_rate": 1.9660792906486516e-07, "loss": 0.5341, "step": 4476 }, { "epoch": 0.7223297838012263, "grad_norm": 1.2758398039089165, "learning_rate": 1.963959183155221e-07, "loss": 0.4151, "step": 4477 }, { "epoch": 0.7224911261697322, "grad_norm": 1.5459381503259368, "learning_rate": 1.9618399399761688e-07, "loss": 0.5323, "step": 4478 }, { "epoch": 0.7226524685382382, "grad_norm": 1.5809521420960675, "learning_rate": 1.9597215617148099e-07, "loss": 0.3225, "step": 4479 }, { "epoch": 0.7228138109067441, "grad_norm": 1.5661874366193038, "learning_rate": 1.9576040489742202e-07, "loss": 0.5414, "step": 4480 }, { "epoch": 0.7229751532752501, "grad_norm": 1.7078206138034184, "learning_rate": 1.9554874023572176e-07, "loss": 0.5386, "step": 4481 }, { "epoch": 0.723136495643756, "grad_norm": 2.173170512932105, "learning_rate": 1.9533716224663832e-07, "loss": 0.7553, "step": 4482 }, { "epoch": 0.723297838012262, "grad_norm": 2.251438740512989, "learning_rate": 1.9512567099040428e-07, "loss": 0.8581, "step": 4483 }, { "epoch": 0.723459180380768, "grad_norm": 2.053172898568461, "learning_rate": 1.949142665272282e-07, "loss": 0.6071, "step": 4484 }, { "epoch": 0.7236205227492739, "grad_norm": 1.3172214598706902, "learning_rate": 1.9470294891729382e-07, "loss": 0.4297, "step": 4485 }, { "epoch": 0.72378186511778, "grad_norm": 2.6328225517647046, "learning_rate": 1.944917182207597e-07, "loss": 0.5858, "step": 4486 }, { "epoch": 0.7239432074862859, "grad_norm": 1.5446595790345359, "learning_rate": 1.942805744977602e-07, "loss": 0.544, "step": 4487 }, { "epoch": 0.7241045498547919, "grad_norm": 1.6234303008902953, "learning_rate": 1.9406951780840487e-07, "loss": 0.4065, "step": 4488 }, { "epoch": 0.7242658922232978, "grad_norm": 1.6451087354533267, "learning_rate": 1.9385854821277798e-07, "loss": 0.374, "step": 4489 }, { "epoch": 0.7244272345918038, "grad_norm": 1.8481750405135324, "learning_rate": 1.936476657709396e-07, "loss": 0.489, "step": 4490 }, { "epoch": 0.7245885769603098, "grad_norm": 1.1728607101147221, "learning_rate": 1.9343687054292434e-07, "loss": 0.337, "step": 4491 }, { "epoch": 0.7247499193288157, "grad_norm": 1.7425327386612846, "learning_rate": 1.9322616258874281e-07, "loss": 0.426, "step": 4492 }, { "epoch": 0.7249112616973217, "grad_norm": 1.4403201924749913, "learning_rate": 1.9301554196838038e-07, "loss": 0.4509, "step": 4493 }, { "epoch": 0.7250726040658277, "grad_norm": 1.6659198120758165, "learning_rate": 1.9280500874179722e-07, "loss": 0.4424, "step": 4494 }, { "epoch": 0.7252339464343337, "grad_norm": 1.852373420416567, "learning_rate": 1.9259456296892918e-07, "loss": 0.5094, "step": 4495 }, { "epoch": 0.7253952888028397, "grad_norm": 1.6701806297382205, "learning_rate": 1.9238420470968664e-07, "loss": 0.4414, "step": 4496 }, { "epoch": 0.7255566311713456, "grad_norm": 1.4973688891654182, "learning_rate": 1.9217393402395553e-07, "loss": 0.625, "step": 4497 }, { "epoch": 0.7257179735398516, "grad_norm": 1.1910298237465642, "learning_rate": 1.919637509715969e-07, "loss": 0.5057, "step": 4498 }, { "epoch": 0.7258793159083575, "grad_norm": 1.4261044488670418, "learning_rate": 1.9175365561244633e-07, "loss": 0.4924, "step": 4499 }, { "epoch": 0.7260406582768635, "grad_norm": 1.3607790043333041, "learning_rate": 1.9154364800631485e-07, "loss": 0.5293, "step": 4500 }, { "epoch": 0.7262020006453694, "grad_norm": 1.5886985159394906, "learning_rate": 1.9133372821298866e-07, "loss": 0.5365, "step": 4501 }, { "epoch": 0.7263633430138754, "grad_norm": 1.6100797128787538, "learning_rate": 1.911238962922282e-07, "loss": 0.3709, "step": 4502 }, { "epoch": 0.7265246853823815, "grad_norm": 1.4353725612971469, "learning_rate": 1.9091415230377e-07, "loss": 0.462, "step": 4503 }, { "epoch": 0.7266860277508874, "grad_norm": 1.4286246596364414, "learning_rate": 1.9070449630732455e-07, "loss": 0.5676, "step": 4504 }, { "epoch": 0.7268473701193934, "grad_norm": 1.8614959160828857, "learning_rate": 1.904949283625778e-07, "loss": 0.567, "step": 4505 }, { "epoch": 0.7270087124878993, "grad_norm": 1.528794870037812, "learning_rate": 1.9028544852919082e-07, "loss": 0.5527, "step": 4506 }, { "epoch": 0.7271700548564053, "grad_norm": 1.6917073111826952, "learning_rate": 1.9007605686679884e-07, "loss": 0.5251, "step": 4507 }, { "epoch": 0.7273313972249112, "grad_norm": 1.2419358484405891, "learning_rate": 1.898667534350129e-07, "loss": 0.4008, "step": 4508 }, { "epoch": 0.7274927395934172, "grad_norm": 1.3992701347964809, "learning_rate": 1.8965753829341807e-07, "loss": 0.4118, "step": 4509 }, { "epoch": 0.7276540819619232, "grad_norm": 1.8634561580559994, "learning_rate": 1.8944841150157498e-07, "loss": 0.4442, "step": 4510 }, { "epoch": 0.7278154243304291, "grad_norm": 1.4179859002707675, "learning_rate": 1.892393731190189e-07, "loss": 0.465, "step": 4511 }, { "epoch": 0.7279767666989352, "grad_norm": 1.9305298237111068, "learning_rate": 1.8903042320525953e-07, "loss": 0.4598, "step": 4512 }, { "epoch": 0.7281381090674411, "grad_norm": 1.5925140824586073, "learning_rate": 1.8882156181978193e-07, "loss": 0.487, "step": 4513 }, { "epoch": 0.7282994514359471, "grad_norm": 1.5100986365129567, "learning_rate": 1.886127890220457e-07, "loss": 0.5304, "step": 4514 }, { "epoch": 0.728460793804453, "grad_norm": 1.7917612701397956, "learning_rate": 1.8840410487148528e-07, "loss": 0.4123, "step": 4515 }, { "epoch": 0.728622136172959, "grad_norm": 1.5553053965070038, "learning_rate": 1.8819550942751007e-07, "loss": 0.5488, "step": 4516 }, { "epoch": 0.728783478541465, "grad_norm": 1.8391032095319362, "learning_rate": 1.8798700274950352e-07, "loss": 0.5344, "step": 4517 }, { "epoch": 0.7289448209099709, "grad_norm": 1.4481534512704493, "learning_rate": 1.8777858489682464e-07, "loss": 0.3645, "step": 4518 }, { "epoch": 0.7291061632784769, "grad_norm": 1.2723365658502555, "learning_rate": 1.875702559288069e-07, "loss": 0.3599, "step": 4519 }, { "epoch": 0.729267505646983, "grad_norm": 1.9413775129129427, "learning_rate": 1.8736201590475799e-07, "loss": 0.4498, "step": 4520 }, { "epoch": 0.7294288480154889, "grad_norm": 1.4968608752004344, "learning_rate": 1.87153864883961e-07, "loss": 0.4304, "step": 4521 }, { "epoch": 0.7295901903839949, "grad_norm": 0.9872068815149134, "learning_rate": 1.8694580292567308e-07, "loss": 0.3046, "step": 4522 }, { "epoch": 0.7297515327525008, "grad_norm": 1.7002538575637822, "learning_rate": 1.867378300891264e-07, "loss": 0.5343, "step": 4523 }, { "epoch": 0.7299128751210068, "grad_norm": 1.6372256711677498, "learning_rate": 1.8652994643352783e-07, "loss": 0.5387, "step": 4524 }, { "epoch": 0.7300742174895127, "grad_norm": 2.522731623227979, "learning_rate": 1.8632215201805812e-07, "loss": 0.7081, "step": 4525 }, { "epoch": 0.7302355598580187, "grad_norm": 1.717894046229582, "learning_rate": 1.8611444690187388e-07, "loss": 0.4446, "step": 4526 }, { "epoch": 0.7303969022265246, "grad_norm": 1.925556409605355, "learning_rate": 1.859068311441051e-07, "loss": 0.5693, "step": 4527 }, { "epoch": 0.7305582445950306, "grad_norm": 1.6976426806480323, "learning_rate": 1.8569930480385697e-07, "loss": 0.397, "step": 4528 }, { "epoch": 0.7307195869635367, "grad_norm": 1.4280333408182049, "learning_rate": 1.8549186794020916e-07, "loss": 0.3956, "step": 4529 }, { "epoch": 0.7308809293320426, "grad_norm": 1.1262257044772295, "learning_rate": 1.852845206122155e-07, "loss": 0.4464, "step": 4530 }, { "epoch": 0.7310422717005486, "grad_norm": 1.151671287750012, "learning_rate": 1.8507726287890474e-07, "loss": 0.2923, "step": 4531 }, { "epoch": 0.7312036140690545, "grad_norm": 1.1377286848813808, "learning_rate": 1.8487009479928018e-07, "loss": 0.5008, "step": 4532 }, { "epoch": 0.7313649564375605, "grad_norm": 1.8457611236340414, "learning_rate": 1.8466301643231907e-07, "loss": 0.5393, "step": 4533 }, { "epoch": 0.7315262988060665, "grad_norm": 1.9051158845752372, "learning_rate": 1.8445602783697373e-07, "loss": 0.5898, "step": 4534 }, { "epoch": 0.7316876411745724, "grad_norm": 1.2801471243437277, "learning_rate": 1.8424912907217037e-07, "loss": 0.2258, "step": 4535 }, { "epoch": 0.7318489835430784, "grad_norm": 2.2326340728860754, "learning_rate": 1.840423201968101e-07, "loss": 0.5741, "step": 4536 }, { "epoch": 0.7320103259115844, "grad_norm": 1.8797635919987916, "learning_rate": 1.8383560126976815e-07, "loss": 0.5314, "step": 4537 }, { "epoch": 0.7321716682800904, "grad_norm": 1.8475515969056886, "learning_rate": 1.8362897234989432e-07, "loss": 0.5667, "step": 4538 }, { "epoch": 0.7323330106485963, "grad_norm": 1.7529658661830612, "learning_rate": 1.8342243349601283e-07, "loss": 0.493, "step": 4539 }, { "epoch": 0.7324943530171023, "grad_norm": 1.460213548118774, "learning_rate": 1.832159847669218e-07, "loss": 0.503, "step": 4540 }, { "epoch": 0.7326556953856083, "grad_norm": 1.6788821396620857, "learning_rate": 1.830096262213942e-07, "loss": 0.5526, "step": 4541 }, { "epoch": 0.7328170377541142, "grad_norm": 1.4549861187616824, "learning_rate": 1.828033579181773e-07, "loss": 0.364, "step": 4542 }, { "epoch": 0.7329783801226202, "grad_norm": 2.0436059603547623, "learning_rate": 1.8259717991599227e-07, "loss": 0.6269, "step": 4543 }, { "epoch": 0.7331397224911261, "grad_norm": 1.607160062546443, "learning_rate": 1.823910922735349e-07, "loss": 0.4939, "step": 4544 }, { "epoch": 0.7333010648596321, "grad_norm": 1.4571696770494513, "learning_rate": 1.8218509504947537e-07, "loss": 0.3319, "step": 4545 }, { "epoch": 0.7334624072281382, "grad_norm": 1.8039262495423258, "learning_rate": 1.8197918830245766e-07, "loss": 0.6984, "step": 4546 }, { "epoch": 0.7336237495966441, "grad_norm": 1.4734388948112351, "learning_rate": 1.817733720911006e-07, "loss": 0.6087, "step": 4547 }, { "epoch": 0.7337850919651501, "grad_norm": 1.8600257568216734, "learning_rate": 1.8156764647399637e-07, "loss": 0.5891, "step": 4548 }, { "epoch": 0.733946434333656, "grad_norm": 1.3657306429507041, "learning_rate": 1.8136201150971258e-07, "loss": 0.4647, "step": 4549 }, { "epoch": 0.734107776702162, "grad_norm": 1.568719919835803, "learning_rate": 1.8115646725678996e-07, "loss": 0.406, "step": 4550 }, { "epoch": 0.7342691190706679, "grad_norm": 1.454222316096118, "learning_rate": 1.8095101377374383e-07, "loss": 0.4734, "step": 4551 }, { "epoch": 0.7344304614391739, "grad_norm": 1.3954140996733768, "learning_rate": 1.8074565111906393e-07, "loss": 0.3233, "step": 4552 }, { "epoch": 0.7345918038076799, "grad_norm": 1.7811505473966693, "learning_rate": 1.8054037935121342e-07, "loss": 0.4976, "step": 4553 }, { "epoch": 0.7347531461761858, "grad_norm": 1.4076215201679452, "learning_rate": 1.8033519852863033e-07, "loss": 0.3981, "step": 4554 }, { "epoch": 0.7349144885446919, "grad_norm": 1.7771676783280528, "learning_rate": 1.801301087097266e-07, "loss": 0.5381, "step": 4555 }, { "epoch": 0.7350758309131978, "grad_norm": 1.2428124732961596, "learning_rate": 1.7992510995288784e-07, "loss": 0.3993, "step": 4556 }, { "epoch": 0.7352371732817038, "grad_norm": 1.4402890721884458, "learning_rate": 1.7972020231647424e-07, "loss": 0.4138, "step": 4557 }, { "epoch": 0.7353985156502097, "grad_norm": 1.6790855571129986, "learning_rate": 1.7951538585882004e-07, "loss": 0.3854, "step": 4558 }, { "epoch": 0.7355598580187157, "grad_norm": 1.5811661980779295, "learning_rate": 1.7931066063823297e-07, "loss": 0.4111, "step": 4559 }, { "epoch": 0.7357212003872217, "grad_norm": 1.2984035629813524, "learning_rate": 1.7910602671299536e-07, "loss": 0.3256, "step": 4560 }, { "epoch": 0.7358825427557276, "grad_norm": 1.7442859277293274, "learning_rate": 1.7890148414136334e-07, "loss": 0.5033, "step": 4561 }, { "epoch": 0.7360438851242336, "grad_norm": 1.730519795579836, "learning_rate": 1.7869703298156736e-07, "loss": 0.4316, "step": 4562 }, { "epoch": 0.7362052274927396, "grad_norm": 1.905756151610015, "learning_rate": 1.78492673291811e-07, "loss": 0.4253, "step": 4563 }, { "epoch": 0.7363665698612456, "grad_norm": 1.8187844735250696, "learning_rate": 1.782884051302726e-07, "loss": 0.5457, "step": 4564 }, { "epoch": 0.7365279122297516, "grad_norm": 1.1467198612567961, "learning_rate": 1.7808422855510437e-07, "loss": 0.3742, "step": 4565 }, { "epoch": 0.7366892545982575, "grad_norm": 1.7418764118891026, "learning_rate": 1.7788014362443189e-07, "loss": 0.4029, "step": 4566 }, { "epoch": 0.7368505969667635, "grad_norm": 1.8031926425033997, "learning_rate": 1.7767615039635514e-07, "loss": 0.4857, "step": 4567 }, { "epoch": 0.7370119393352694, "grad_norm": 1.3755311806908392, "learning_rate": 1.7747224892894814e-07, "loss": 0.3474, "step": 4568 }, { "epoch": 0.7371732817037754, "grad_norm": 1.9230486379890799, "learning_rate": 1.7726843928025814e-07, "loss": 0.4389, "step": 4569 }, { "epoch": 0.7373346240722813, "grad_norm": 1.6424391389483923, "learning_rate": 1.7706472150830675e-07, "loss": 0.6003, "step": 4570 }, { "epoch": 0.7374959664407873, "grad_norm": 1.429305228735153, "learning_rate": 1.7686109567108937e-07, "loss": 0.4928, "step": 4571 }, { "epoch": 0.7376573088092934, "grad_norm": 1.4656212942894, "learning_rate": 1.7665756182657516e-07, "loss": 0.5566, "step": 4572 }, { "epoch": 0.7378186511777993, "grad_norm": 1.2748097707916788, "learning_rate": 1.764541200327072e-07, "loss": 0.5256, "step": 4573 }, { "epoch": 0.7379799935463053, "grad_norm": 1.3929058614880963, "learning_rate": 1.7625077034740193e-07, "loss": 0.3495, "step": 4574 }, { "epoch": 0.7381413359148112, "grad_norm": 1.996508710097514, "learning_rate": 1.7604751282855034e-07, "loss": 0.738, "step": 4575 }, { "epoch": 0.7383026782833172, "grad_norm": 1.8782004698253845, "learning_rate": 1.7584434753401627e-07, "loss": 0.5178, "step": 4576 }, { "epoch": 0.7384640206518231, "grad_norm": 1.596460095158648, "learning_rate": 1.7564127452163795e-07, "loss": 0.3644, "step": 4577 }, { "epoch": 0.7386253630203291, "grad_norm": 1.5005154518773314, "learning_rate": 1.7543829384922741e-07, "loss": 0.4325, "step": 4578 }, { "epoch": 0.7387867053888351, "grad_norm": 1.4955275138841633, "learning_rate": 1.7523540557456977e-07, "loss": 0.3702, "step": 4579 }, { "epoch": 0.7389480477573411, "grad_norm": 1.8696025727978667, "learning_rate": 1.7503260975542434e-07, "loss": 0.7401, "step": 4580 }, { "epoch": 0.7391093901258471, "grad_norm": 1.6596974894632943, "learning_rate": 1.748299064495242e-07, "loss": 0.4033, "step": 4581 }, { "epoch": 0.739270732494353, "grad_norm": 1.3231465097443968, "learning_rate": 1.7462729571457558e-07, "loss": 0.4789, "step": 4582 }, { "epoch": 0.739432074862859, "grad_norm": 1.643671386347343, "learning_rate": 1.7442477760825874e-07, "loss": 0.4922, "step": 4583 }, { "epoch": 0.739593417231365, "grad_norm": 1.5186677258973171, "learning_rate": 1.7422235218822757e-07, "loss": 0.3925, "step": 4584 }, { "epoch": 0.7397547595998709, "grad_norm": 2.5507415254312473, "learning_rate": 1.7402001951210943e-07, "loss": 0.6478, "step": 4585 }, { "epoch": 0.7399161019683769, "grad_norm": 1.8414762668949811, "learning_rate": 1.7381777963750555e-07, "loss": 0.58, "step": 4586 }, { "epoch": 0.7400774443368828, "grad_norm": 1.7116693473034712, "learning_rate": 1.7361563262199014e-07, "loss": 0.6442, "step": 4587 }, { "epoch": 0.7402387867053888, "grad_norm": 1.2928706935798677, "learning_rate": 1.7341357852311172e-07, "loss": 0.4186, "step": 4588 }, { "epoch": 0.7404001290738949, "grad_norm": 2.073539205901211, "learning_rate": 1.732116173983917e-07, "loss": 0.5814, "step": 4589 }, { "epoch": 0.7405614714424008, "grad_norm": 1.4595046743414146, "learning_rate": 1.7300974930532542e-07, "loss": 0.5049, "step": 4590 }, { "epoch": 0.7407228138109068, "grad_norm": 1.2093339701156314, "learning_rate": 1.728079743013819e-07, "loss": 0.3645, "step": 4591 }, { "epoch": 0.7408841561794127, "grad_norm": 1.771255427899386, "learning_rate": 1.7260629244400298e-07, "loss": 0.4406, "step": 4592 }, { "epoch": 0.7410454985479187, "grad_norm": 1.674076876412137, "learning_rate": 1.7240470379060461e-07, "loss": 0.4782, "step": 4593 }, { "epoch": 0.7412068409164246, "grad_norm": 1.522063746096127, "learning_rate": 1.72203208398576e-07, "loss": 0.3505, "step": 4594 }, { "epoch": 0.7413681832849306, "grad_norm": 1.90394214945349, "learning_rate": 1.7200180632527983e-07, "loss": 0.5994, "step": 4595 }, { "epoch": 0.7415295256534365, "grad_norm": 2.273479343304525, "learning_rate": 1.7180049762805227e-07, "loss": 0.5564, "step": 4596 }, { "epoch": 0.7416908680219426, "grad_norm": 1.519393169336806, "learning_rate": 1.715992823642025e-07, "loss": 0.3711, "step": 4597 }, { "epoch": 0.7418522103904486, "grad_norm": 1.592376940297763, "learning_rate": 1.713981605910137e-07, "loss": 0.5079, "step": 4598 }, { "epoch": 0.7420135527589545, "grad_norm": 1.0897952380030989, "learning_rate": 1.7119713236574223e-07, "loss": 0.454, "step": 4599 }, { "epoch": 0.7421748951274605, "grad_norm": 1.730882100150022, "learning_rate": 1.7099619774561746e-07, "loss": 0.609, "step": 4600 }, { "epoch": 0.7423362374959664, "grad_norm": 1.4292559363212032, "learning_rate": 1.7079535678784274e-07, "loss": 0.3863, "step": 4601 }, { "epoch": 0.7424975798644724, "grad_norm": 1.319654040950135, "learning_rate": 1.7059460954959408e-07, "loss": 0.5277, "step": 4602 }, { "epoch": 0.7426589222329784, "grad_norm": 1.756557508270495, "learning_rate": 1.7039395608802132e-07, "loss": 0.5022, "step": 4603 }, { "epoch": 0.7428202646014843, "grad_norm": 1.9433299306814635, "learning_rate": 1.7019339646024756e-07, "loss": 0.3908, "step": 4604 }, { "epoch": 0.7429816069699903, "grad_norm": 1.3369545885161753, "learning_rate": 1.6999293072336872e-07, "loss": 0.3048, "step": 4605 }, { "epoch": 0.7431429493384963, "grad_norm": 1.45044329542734, "learning_rate": 1.697925589344546e-07, "loss": 0.5297, "step": 4606 }, { "epoch": 0.7433042917070023, "grad_norm": 1.273426968583633, "learning_rate": 1.6959228115054787e-07, "loss": 0.4123, "step": 4607 }, { "epoch": 0.7434656340755083, "grad_norm": 1.8473042642767497, "learning_rate": 1.6939209742866455e-07, "loss": 0.5314, "step": 4608 }, { "epoch": 0.7436269764440142, "grad_norm": 1.3926049185766127, "learning_rate": 1.6919200782579417e-07, "loss": 0.3696, "step": 4609 }, { "epoch": 0.7437883188125202, "grad_norm": 2.2104896118706896, "learning_rate": 1.689920123988987e-07, "loss": 0.5714, "step": 4610 }, { "epoch": 0.7439496611810261, "grad_norm": 1.2327229997953912, "learning_rate": 1.6879211120491405e-07, "loss": 0.5161, "step": 4611 }, { "epoch": 0.7441110035495321, "grad_norm": 1.6605756880531444, "learning_rate": 1.6859230430074916e-07, "loss": 0.4241, "step": 4612 }, { "epoch": 0.744272345918038, "grad_norm": 1.0775329512228706, "learning_rate": 1.683925917432857e-07, "loss": 0.3737, "step": 4613 }, { "epoch": 0.744433688286544, "grad_norm": 1.596245326884456, "learning_rate": 1.68192973589379e-07, "loss": 0.5317, "step": 4614 }, { "epoch": 0.7445950306550501, "grad_norm": 1.6496128032557469, "learning_rate": 1.6799344989585712e-07, "loss": 0.6738, "step": 4615 }, { "epoch": 0.744756373023556, "grad_norm": 1.6556828805462227, "learning_rate": 1.6779402071952142e-07, "loss": 0.467, "step": 4616 }, { "epoch": 0.744917715392062, "grad_norm": 1.9037842358862735, "learning_rate": 1.6759468611714644e-07, "loss": 0.4279, "step": 4617 }, { "epoch": 0.7450790577605679, "grad_norm": 1.271778875369216, "learning_rate": 1.6739544614547968e-07, "loss": 0.5001, "step": 4618 }, { "epoch": 0.7452404001290739, "grad_norm": 1.3560417788942076, "learning_rate": 1.6719630086124186e-07, "loss": 0.4645, "step": 4619 }, { "epoch": 0.7454017424975798, "grad_norm": 1.624608432836459, "learning_rate": 1.6699725032112626e-07, "loss": 0.4734, "step": 4620 }, { "epoch": 0.7455630848660858, "grad_norm": 1.3078195131904577, "learning_rate": 1.6679829458179966e-07, "loss": 0.4672, "step": 4621 }, { "epoch": 0.7457244272345918, "grad_norm": 2.0593098330570343, "learning_rate": 1.6659943369990198e-07, "loss": 0.5682, "step": 4622 }, { "epoch": 0.7458857696030978, "grad_norm": 1.8886035800329903, "learning_rate": 1.664006677320454e-07, "loss": 0.6026, "step": 4623 }, { "epoch": 0.7460471119716038, "grad_norm": 1.465115127856977, "learning_rate": 1.6620199673481583e-07, "loss": 0.5543, "step": 4624 }, { "epoch": 0.7462084543401097, "grad_norm": 1.3316693303714573, "learning_rate": 1.6600342076477196e-07, "loss": 0.4148, "step": 4625 }, { "epoch": 0.7463697967086157, "grad_norm": 1.3042049772020476, "learning_rate": 1.6580493987844507e-07, "loss": 0.3895, "step": 4626 }, { "epoch": 0.7465311390771217, "grad_norm": 1.3558467810553374, "learning_rate": 1.6560655413233993e-07, "loss": 0.3014, "step": 4627 }, { "epoch": 0.7466924814456276, "grad_norm": 1.6253014798369279, "learning_rate": 1.6540826358293358e-07, "loss": 0.5167, "step": 4628 }, { "epoch": 0.7468538238141336, "grad_norm": 1.9062405570374783, "learning_rate": 1.6521006828667644e-07, "loss": 0.3621, "step": 4629 }, { "epoch": 0.7470151661826395, "grad_norm": 1.852223982331807, "learning_rate": 1.6501196829999176e-07, "loss": 0.4695, "step": 4630 }, { "epoch": 0.7471765085511455, "grad_norm": 1.5113849912166009, "learning_rate": 1.648139636792755e-07, "loss": 0.467, "step": 4631 }, { "epoch": 0.7473378509196515, "grad_norm": 2.3647877833054705, "learning_rate": 1.6461605448089682e-07, "loss": 0.7284, "step": 4632 }, { "epoch": 0.7474991932881575, "grad_norm": 1.687122232044123, "learning_rate": 1.64418240761197e-07, "loss": 0.5695, "step": 4633 }, { "epoch": 0.7476605356566635, "grad_norm": 1.182457205096817, "learning_rate": 1.6422052257649077e-07, "loss": 0.3904, "step": 4634 }, { "epoch": 0.7478218780251694, "grad_norm": 1.0077858263374528, "learning_rate": 1.640228999830657e-07, "loss": 0.3977, "step": 4635 }, { "epoch": 0.7479832203936754, "grad_norm": 1.6641832048221241, "learning_rate": 1.6382537303718147e-07, "loss": 0.4403, "step": 4636 }, { "epoch": 0.7481445627621813, "grad_norm": 1.1427414475786284, "learning_rate": 1.6362794179507122e-07, "loss": 0.4427, "step": 4637 }, { "epoch": 0.7483059051306873, "grad_norm": 1.6675606078610434, "learning_rate": 1.634306063129408e-07, "loss": 0.4478, "step": 4638 }, { "epoch": 0.7484672474991932, "grad_norm": 1.2596686274128055, "learning_rate": 1.6323336664696823e-07, "loss": 0.4138, "step": 4639 }, { "epoch": 0.7486285898676993, "grad_norm": 1.653522621030041, "learning_rate": 1.6303622285330482e-07, "loss": 0.6816, "step": 4640 }, { "epoch": 0.7487899322362053, "grad_norm": 1.2908935950890212, "learning_rate": 1.6283917498807438e-07, "loss": 0.3809, "step": 4641 }, { "epoch": 0.7489512746047112, "grad_norm": 1.8505090219421538, "learning_rate": 1.6264222310737357e-07, "loss": 0.426, "step": 4642 }, { "epoch": 0.7491126169732172, "grad_norm": 1.8269144598568465, "learning_rate": 1.6244536726727127e-07, "loss": 0.4885, "step": 4643 }, { "epoch": 0.7492739593417231, "grad_norm": 1.5627576996963428, "learning_rate": 1.6224860752380948e-07, "loss": 0.3684, "step": 4644 }, { "epoch": 0.7494353017102291, "grad_norm": 1.8613988936941814, "learning_rate": 1.620519439330028e-07, "loss": 0.6036, "step": 4645 }, { "epoch": 0.749596644078735, "grad_norm": 1.8596600866949489, "learning_rate": 1.6185537655083813e-07, "loss": 0.3924, "step": 4646 }, { "epoch": 0.749757986447241, "grad_norm": 1.5566897127268473, "learning_rate": 1.6165890543327525e-07, "loss": 0.3908, "step": 4647 }, { "epoch": 0.749919328815747, "grad_norm": 1.7165710509007563, "learning_rate": 1.6146253063624677e-07, "loss": 0.4595, "step": 4648 }, { "epoch": 0.750080671184253, "grad_norm": 1.6262898094705682, "learning_rate": 1.6126625221565715e-07, "loss": 0.4103, "step": 4649 }, { "epoch": 0.750242013552759, "grad_norm": 1.3778673532338317, "learning_rate": 1.6107007022738405e-07, "loss": 0.4216, "step": 4650 }, { "epoch": 0.750403355921265, "grad_norm": 1.6644432098014563, "learning_rate": 1.6087398472727764e-07, "loss": 0.3864, "step": 4651 }, { "epoch": 0.7505646982897709, "grad_norm": 2.030729491227359, "learning_rate": 1.6067799577116004e-07, "loss": 0.5252, "step": 4652 }, { "epoch": 0.7507260406582769, "grad_norm": 1.4887497532457585, "learning_rate": 1.6048210341482687e-07, "loss": 0.6138, "step": 4653 }, { "epoch": 0.7508873830267828, "grad_norm": 3.1625142394940204, "learning_rate": 1.602863077140452e-07, "loss": 0.4671, "step": 4654 }, { "epoch": 0.7510487253952888, "grad_norm": 1.5903646379688088, "learning_rate": 1.6009060872455554e-07, "loss": 0.4291, "step": 4655 }, { "epoch": 0.7512100677637947, "grad_norm": 1.848936061098159, "learning_rate": 1.598950065020699e-07, "loss": 0.516, "step": 4656 }, { "epoch": 0.7513714101323007, "grad_norm": 2.658266536228189, "learning_rate": 1.5969950110227348e-07, "loss": 0.6431, "step": 4657 }, { "epoch": 0.7515327525008068, "grad_norm": 1.6948558702208703, "learning_rate": 1.5950409258082382e-07, "loss": 0.4608, "step": 4658 }, { "epoch": 0.7516940948693127, "grad_norm": 2.1062584353314837, "learning_rate": 1.5930878099335038e-07, "loss": 0.5887, "step": 4659 }, { "epoch": 0.7518554372378187, "grad_norm": 1.3283178750402667, "learning_rate": 1.5911356639545564e-07, "loss": 0.3756, "step": 4660 }, { "epoch": 0.7520167796063246, "grad_norm": 1.6745832380244308, "learning_rate": 1.5891844884271427e-07, "loss": 0.5897, "step": 4661 }, { "epoch": 0.7521781219748306, "grad_norm": 1.8796551455077966, "learning_rate": 1.5872342839067304e-07, "loss": 0.5417, "step": 4662 }, { "epoch": 0.7523394643433365, "grad_norm": 2.2663104894763855, "learning_rate": 1.5852850509485134e-07, "loss": 0.4409, "step": 4663 }, { "epoch": 0.7525008067118425, "grad_norm": 1.7680697774236018, "learning_rate": 1.583336790107409e-07, "loss": 0.5513, "step": 4664 }, { "epoch": 0.7526621490803485, "grad_norm": 1.6468535130526878, "learning_rate": 1.5813895019380573e-07, "loss": 0.3716, "step": 4665 }, { "epoch": 0.7528234914488545, "grad_norm": 1.955908782743294, "learning_rate": 1.5794431869948226e-07, "loss": 0.6001, "step": 4666 }, { "epoch": 0.7529848338173605, "grad_norm": 1.4363205258788558, "learning_rate": 1.5774978458317888e-07, "loss": 0.414, "step": 4667 }, { "epoch": 0.7531461761858664, "grad_norm": 2.043500841917607, "learning_rate": 1.575553479002767e-07, "loss": 0.4218, "step": 4668 }, { "epoch": 0.7533075185543724, "grad_norm": 1.9479278999317504, "learning_rate": 1.5736100870612857e-07, "loss": 0.5084, "step": 4669 }, { "epoch": 0.7534688609228783, "grad_norm": 1.383302806417839, "learning_rate": 1.5716676705606008e-07, "loss": 0.4657, "step": 4670 }, { "epoch": 0.7536302032913843, "grad_norm": 1.1555589724504458, "learning_rate": 1.5697262300536902e-07, "loss": 0.211, "step": 4671 }, { "epoch": 0.7537915456598903, "grad_norm": 2.167729325004198, "learning_rate": 1.5677857660932485e-07, "loss": 0.6038, "step": 4672 }, { "epoch": 0.7539528880283962, "grad_norm": 1.1624294321114623, "learning_rate": 1.5658462792316985e-07, "loss": 0.3022, "step": 4673 }, { "epoch": 0.7541142303969022, "grad_norm": 2.04756304409795, "learning_rate": 1.5639077700211832e-07, "loss": 0.5976, "step": 4674 }, { "epoch": 0.7542755727654082, "grad_norm": 1.4306981417922708, "learning_rate": 1.561970239013562e-07, "loss": 0.432, "step": 4675 }, { "epoch": 0.7544369151339142, "grad_norm": 2.0466839650238007, "learning_rate": 1.5600336867604273e-07, "loss": 0.6291, "step": 4676 }, { "epoch": 0.7545982575024202, "grad_norm": 1.2254915360649112, "learning_rate": 1.5580981138130805e-07, "loss": 0.4128, "step": 4677 }, { "epoch": 0.7547595998709261, "grad_norm": 1.79495003463357, "learning_rate": 1.5561635207225515e-07, "loss": 0.517, "step": 4678 }, { "epoch": 0.7549209422394321, "grad_norm": 1.2905394906819327, "learning_rate": 1.554229908039591e-07, "loss": 0.468, "step": 4679 }, { "epoch": 0.755082284607938, "grad_norm": 1.7625380681032288, "learning_rate": 1.552297276314665e-07, "loss": 0.4811, "step": 4680 }, { "epoch": 0.755243626976444, "grad_norm": 1.3386712463039039, "learning_rate": 1.5503656260979687e-07, "loss": 0.3962, "step": 4681 }, { "epoch": 0.7554049693449499, "grad_norm": 2.002587075691726, "learning_rate": 1.548434957939409e-07, "loss": 0.5073, "step": 4682 }, { "epoch": 0.755566311713456, "grad_norm": 1.1678292181560328, "learning_rate": 1.5465052723886202e-07, "loss": 0.3806, "step": 4683 }, { "epoch": 0.755727654081962, "grad_norm": 1.418066688707275, "learning_rate": 1.5445765699949548e-07, "loss": 0.5083, "step": 4684 }, { "epoch": 0.7558889964504679, "grad_norm": 1.5931908617738788, "learning_rate": 1.5426488513074826e-07, "loss": 0.5096, "step": 4685 }, { "epoch": 0.7560503388189739, "grad_norm": 1.5462993745314229, "learning_rate": 1.5407221168749967e-07, "loss": 0.3886, "step": 4686 }, { "epoch": 0.7562116811874798, "grad_norm": 1.459124332440571, "learning_rate": 1.5387963672460085e-07, "loss": 0.5497, "step": 4687 }, { "epoch": 0.7563730235559858, "grad_norm": 2.1376360888966737, "learning_rate": 1.5368716029687506e-07, "loss": 0.5906, "step": 4688 }, { "epoch": 0.7565343659244917, "grad_norm": 2.1221266000430106, "learning_rate": 1.5349478245911752e-07, "loss": 0.563, "step": 4689 }, { "epoch": 0.7566957082929977, "grad_norm": 1.555687051089561, "learning_rate": 1.5330250326609485e-07, "loss": 0.4492, "step": 4690 }, { "epoch": 0.7568570506615037, "grad_norm": 1.7760815508737215, "learning_rate": 1.531103227725462e-07, "loss": 0.4696, "step": 4691 }, { "epoch": 0.7570183930300097, "grad_norm": 1.3989760997304002, "learning_rate": 1.529182410331827e-07, "loss": 0.3475, "step": 4692 }, { "epoch": 0.7571797353985157, "grad_norm": 1.8359033459672223, "learning_rate": 1.5272625810268652e-07, "loss": 0.5278, "step": 4693 }, { "epoch": 0.7573410777670216, "grad_norm": 1.7991836975102, "learning_rate": 1.5253437403571278e-07, "loss": 0.5352, "step": 4694 }, { "epoch": 0.7575024201355276, "grad_norm": 1.4037542448718463, "learning_rate": 1.523425888868876e-07, "loss": 0.3059, "step": 4695 }, { "epoch": 0.7576637625040336, "grad_norm": 1.6331988155998813, "learning_rate": 1.521509027108094e-07, "loss": 0.4664, "step": 4696 }, { "epoch": 0.7578251048725395, "grad_norm": 1.8179028324099467, "learning_rate": 1.519593155620484e-07, "loss": 0.4692, "step": 4697 }, { "epoch": 0.7579864472410455, "grad_norm": 1.5773749504763472, "learning_rate": 1.517678274951461e-07, "loss": 0.5051, "step": 4698 }, { "epoch": 0.7581477896095514, "grad_norm": 2.035942526142887, "learning_rate": 1.5157643856461695e-07, "loss": 0.7767, "step": 4699 }, { "epoch": 0.7583091319780574, "grad_norm": 1.1849479859441112, "learning_rate": 1.5138514882494586e-07, "loss": 0.5143, "step": 4700 }, { "epoch": 0.7584704743465635, "grad_norm": 1.1830387639831554, "learning_rate": 1.511939583305903e-07, "loss": 0.3715, "step": 4701 }, { "epoch": 0.7586318167150694, "grad_norm": 1.7890726947077507, "learning_rate": 1.5100286713597937e-07, "loss": 0.5514, "step": 4702 }, { "epoch": 0.7587931590835754, "grad_norm": 1.610582361748641, "learning_rate": 1.5081187529551358e-07, "loss": 0.5675, "step": 4703 }, { "epoch": 0.7589545014520813, "grad_norm": 1.7563927236103523, "learning_rate": 1.506209828635655e-07, "loss": 0.389, "step": 4704 }, { "epoch": 0.7591158438205873, "grad_norm": 1.4332304865880947, "learning_rate": 1.5043018989447947e-07, "loss": 0.5083, "step": 4705 }, { "epoch": 0.7592771861890932, "grad_norm": 1.8420404817189833, "learning_rate": 1.5023949644257106e-07, "loss": 0.496, "step": 4706 }, { "epoch": 0.7594385285575992, "grad_norm": 1.655928166628262, "learning_rate": 1.5004890256212792e-07, "loss": 0.5234, "step": 4707 }, { "epoch": 0.7595998709261051, "grad_norm": 1.798207062939314, "learning_rate": 1.4985840830740908e-07, "loss": 0.5992, "step": 4708 }, { "epoch": 0.7597612132946112, "grad_norm": 2.183012255387411, "learning_rate": 1.4966801373264542e-07, "loss": 0.5594, "step": 4709 }, { "epoch": 0.7599225556631172, "grad_norm": 1.7118197739917453, "learning_rate": 1.4947771889203937e-07, "loss": 0.4972, "step": 4710 }, { "epoch": 0.7600838980316231, "grad_norm": 1.4223983975078538, "learning_rate": 1.4928752383976495e-07, "loss": 0.5317, "step": 4711 }, { "epoch": 0.7602452404001291, "grad_norm": 1.4158727534335158, "learning_rate": 1.49097428629968e-07, "loss": 0.3241, "step": 4712 }, { "epoch": 0.760406582768635, "grad_norm": 1.4752542664505472, "learning_rate": 1.4890743331676532e-07, "loss": 0.511, "step": 4713 }, { "epoch": 0.760567925137141, "grad_norm": 1.844600739840347, "learning_rate": 1.4871753795424586e-07, "loss": 0.4443, "step": 4714 }, { "epoch": 0.760729267505647, "grad_norm": 1.5184903931576053, "learning_rate": 1.4852774259647005e-07, "loss": 0.4498, "step": 4715 }, { "epoch": 0.7608906098741529, "grad_norm": 1.3431032531738647, "learning_rate": 1.4833804729746953e-07, "loss": 0.424, "step": 4716 }, { "epoch": 0.7610519522426589, "grad_norm": 1.3957714475131422, "learning_rate": 1.4814845211124765e-07, "loss": 0.4745, "step": 4717 }, { "epoch": 0.7612132946111649, "grad_norm": 1.3241174259637185, "learning_rate": 1.4795895709177952e-07, "loss": 0.4855, "step": 4718 }, { "epoch": 0.7613746369796709, "grad_norm": 1.4544108454504496, "learning_rate": 1.4776956229301112e-07, "loss": 0.5657, "step": 4719 }, { "epoch": 0.7615359793481769, "grad_norm": 1.8192534415215256, "learning_rate": 1.4758026776886056e-07, "loss": 0.5228, "step": 4720 }, { "epoch": 0.7616973217166828, "grad_norm": 2.3293581775234906, "learning_rate": 1.4739107357321668e-07, "loss": 0.569, "step": 4721 }, { "epoch": 0.7618586640851888, "grad_norm": 1.642728347349998, "learning_rate": 1.472019797599407e-07, "loss": 0.5025, "step": 4722 }, { "epoch": 0.7620200064536947, "grad_norm": 1.9607906210554815, "learning_rate": 1.4701298638286437e-07, "loss": 0.5636, "step": 4723 }, { "epoch": 0.7621813488222007, "grad_norm": 1.5024955176922907, "learning_rate": 1.4682409349579134e-07, "loss": 0.4504, "step": 4724 }, { "epoch": 0.7623426911907066, "grad_norm": 1.5881445049482783, "learning_rate": 1.466353011524966e-07, "loss": 0.3105, "step": 4725 }, { "epoch": 0.7625040335592127, "grad_norm": 1.6106365407427377, "learning_rate": 1.4644660940672627e-07, "loss": 0.4568, "step": 4726 }, { "epoch": 0.7626653759277187, "grad_norm": 1.8761932390892078, "learning_rate": 1.4625801831219802e-07, "loss": 0.5543, "step": 4727 }, { "epoch": 0.7628267182962246, "grad_norm": 1.365551083657192, "learning_rate": 1.4606952792260114e-07, "loss": 0.386, "step": 4728 }, { "epoch": 0.7629880606647306, "grad_norm": 1.4001878833719996, "learning_rate": 1.4588113829159554e-07, "loss": 0.5334, "step": 4729 }, { "epoch": 0.7631494030332365, "grad_norm": 1.7580869333300542, "learning_rate": 1.4569284947281306e-07, "loss": 0.4315, "step": 4730 }, { "epoch": 0.7633107454017425, "grad_norm": 1.581059965392287, "learning_rate": 1.4550466151985686e-07, "loss": 0.564, "step": 4731 }, { "epoch": 0.7634720877702484, "grad_norm": 1.6700746343690132, "learning_rate": 1.453165744863006e-07, "loss": 0.498, "step": 4732 }, { "epoch": 0.7636334301387544, "grad_norm": 1.6258478887008443, "learning_rate": 1.4512858842569048e-07, "loss": 0.3735, "step": 4733 }, { "epoch": 0.7637947725072604, "grad_norm": 1.7390994771503328, "learning_rate": 1.4494070339154274e-07, "loss": 0.4996, "step": 4734 }, { "epoch": 0.7639561148757664, "grad_norm": 1.8777385421970154, "learning_rate": 1.4475291943734574e-07, "loss": 0.5295, "step": 4735 }, { "epoch": 0.7641174572442724, "grad_norm": 1.1998712709044848, "learning_rate": 1.4456523661655833e-07, "loss": 0.2972, "step": 4736 }, { "epoch": 0.7642787996127783, "grad_norm": 2.2246825731139586, "learning_rate": 1.4437765498261112e-07, "loss": 0.4475, "step": 4737 }, { "epoch": 0.7644401419812843, "grad_norm": 1.7882816659669405, "learning_rate": 1.4419017458890592e-07, "loss": 0.4969, "step": 4738 }, { "epoch": 0.7646014843497903, "grad_norm": 1.999321941829604, "learning_rate": 1.4400279548881516e-07, "loss": 0.5833, "step": 4739 }, { "epoch": 0.7647628267182962, "grad_norm": 1.6004563396091496, "learning_rate": 1.4381551773568302e-07, "loss": 0.516, "step": 4740 }, { "epoch": 0.7649241690868022, "grad_norm": 1.6104497931635928, "learning_rate": 1.436283413828247e-07, "loss": 0.3338, "step": 4741 }, { "epoch": 0.7650855114553081, "grad_norm": 1.6335943804921667, "learning_rate": 1.4344126648352617e-07, "loss": 0.5608, "step": 4742 }, { "epoch": 0.7652468538238142, "grad_norm": 1.4626023705021842, "learning_rate": 1.4325429309104498e-07, "loss": 0.4631, "step": 4743 }, { "epoch": 0.7654081961923201, "grad_norm": 1.643524876832478, "learning_rate": 1.430674212586095e-07, "loss": 0.5523, "step": 4744 }, { "epoch": 0.7655695385608261, "grad_norm": 1.5316611535959297, "learning_rate": 1.4288065103941938e-07, "loss": 0.5309, "step": 4745 }, { "epoch": 0.7657308809293321, "grad_norm": 1.5195501566595708, "learning_rate": 1.4269398248664537e-07, "loss": 0.523, "step": 4746 }, { "epoch": 0.765892223297838, "grad_norm": 1.6669394110988731, "learning_rate": 1.4250741565342884e-07, "loss": 0.3729, "step": 4747 }, { "epoch": 0.766053565666344, "grad_norm": 1.1803246602670994, "learning_rate": 1.423209505928828e-07, "loss": 0.5403, "step": 4748 }, { "epoch": 0.7662149080348499, "grad_norm": 1.6262606352591011, "learning_rate": 1.421345873580907e-07, "loss": 0.6136, "step": 4749 }, { "epoch": 0.7663762504033559, "grad_norm": 1.1083479949631536, "learning_rate": 1.4194832600210748e-07, "loss": 0.4816, "step": 4750 }, { "epoch": 0.7665375927718618, "grad_norm": 2.051487392378429, "learning_rate": 1.4176216657795902e-07, "loss": 0.6793, "step": 4751 }, { "epoch": 0.7666989351403679, "grad_norm": 2.096216371797668, "learning_rate": 1.4157610913864182e-07, "loss": 0.6987, "step": 4752 }, { "epoch": 0.7668602775088739, "grad_norm": 1.194872307598881, "learning_rate": 1.4139015373712364e-07, "loss": 0.3017, "step": 4753 }, { "epoch": 0.7670216198773798, "grad_norm": 1.390333466563603, "learning_rate": 1.412043004263434e-07, "loss": 0.3326, "step": 4754 }, { "epoch": 0.7671829622458858, "grad_norm": 1.7323031723487565, "learning_rate": 1.4101854925921014e-07, "loss": 0.549, "step": 4755 }, { "epoch": 0.7673443046143917, "grad_norm": 1.8870685008381303, "learning_rate": 1.40832900288605e-07, "loss": 0.5417, "step": 4756 }, { "epoch": 0.7675056469828977, "grad_norm": 1.2293896984623813, "learning_rate": 1.40647353567379e-07, "loss": 0.3924, "step": 4757 }, { "epoch": 0.7676669893514037, "grad_norm": 1.3113583640531554, "learning_rate": 1.4046190914835458e-07, "loss": 0.418, "step": 4758 }, { "epoch": 0.7678283317199096, "grad_norm": 1.8003052187803663, "learning_rate": 1.4027656708432507e-07, "loss": 0.4939, "step": 4759 }, { "epoch": 0.7679896740884156, "grad_norm": 1.4218930253783182, "learning_rate": 1.4009132742805424e-07, "loss": 0.4579, "step": 4760 }, { "epoch": 0.7681510164569216, "grad_norm": 1.3820696629716533, "learning_rate": 1.3990619023227722e-07, "loss": 0.4669, "step": 4761 }, { "epoch": 0.7683123588254276, "grad_norm": 1.5292619528260447, "learning_rate": 1.3972115554969954e-07, "loss": 0.4401, "step": 4762 }, { "epoch": 0.7684737011939335, "grad_norm": 1.6988799360264046, "learning_rate": 1.3953622343299787e-07, "loss": 0.4551, "step": 4763 }, { "epoch": 0.7686350435624395, "grad_norm": 1.5480910636376328, "learning_rate": 1.3935139393481964e-07, "loss": 0.5097, "step": 4764 }, { "epoch": 0.7687963859309455, "grad_norm": 1.8142154073877435, "learning_rate": 1.391666671077827e-07, "loss": 0.647, "step": 4765 }, { "epoch": 0.7689577282994514, "grad_norm": 1.4171113083113982, "learning_rate": 1.3898204300447614e-07, "loss": 0.4054, "step": 4766 }, { "epoch": 0.7691190706679574, "grad_norm": 1.8055236332671625, "learning_rate": 1.387975216774596e-07, "loss": 0.4837, "step": 4767 }, { "epoch": 0.7692804130364633, "grad_norm": 1.5652565863384986, "learning_rate": 1.3861310317926351e-07, "loss": 0.413, "step": 4768 }, { "epoch": 0.7694417554049694, "grad_norm": 1.7201250991128234, "learning_rate": 1.3842878756238908e-07, "loss": 0.5079, "step": 4769 }, { "epoch": 0.7696030977734754, "grad_norm": 2.0403403301213565, "learning_rate": 1.3824457487930786e-07, "loss": 0.5157, "step": 4770 }, { "epoch": 0.7697644401419813, "grad_norm": 1.3184386278985356, "learning_rate": 1.3806046518246262e-07, "loss": 0.4122, "step": 4771 }, { "epoch": 0.7699257825104873, "grad_norm": 2.3729084241376155, "learning_rate": 1.378764585242666e-07, "loss": 0.5261, "step": 4772 }, { "epoch": 0.7700871248789932, "grad_norm": 1.4223601802500097, "learning_rate": 1.3769255495710346e-07, "loss": 0.4815, "step": 4773 }, { "epoch": 0.7702484672474992, "grad_norm": 1.4745788975275516, "learning_rate": 1.37508754533328e-07, "loss": 0.4143, "step": 4774 }, { "epoch": 0.7704098096160051, "grad_norm": 1.335509069799705, "learning_rate": 1.373250573052651e-07, "loss": 0.4405, "step": 4775 }, { "epoch": 0.7705711519845111, "grad_norm": 1.6953653775595792, "learning_rate": 1.3714146332521077e-07, "loss": 0.5248, "step": 4776 }, { "epoch": 0.770732494353017, "grad_norm": 1.7596970014496844, "learning_rate": 1.369579726454314e-07, "loss": 0.5394, "step": 4777 }, { "epoch": 0.7708938367215231, "grad_norm": 1.34219261230632, "learning_rate": 1.3677458531816366e-07, "loss": 0.3947, "step": 4778 }, { "epoch": 0.7710551790900291, "grad_norm": 1.4829691604165138, "learning_rate": 1.3659130139561565e-07, "loss": 0.3819, "step": 4779 }, { "epoch": 0.771216521458535, "grad_norm": 1.2544411917472442, "learning_rate": 1.3640812092996512e-07, "loss": 0.3816, "step": 4780 }, { "epoch": 0.771377863827041, "grad_norm": 1.7745586330070868, "learning_rate": 1.3622504397336083e-07, "loss": 0.5671, "step": 4781 }, { "epoch": 0.771539206195547, "grad_norm": 1.4920597521038717, "learning_rate": 1.3604207057792217e-07, "loss": 0.53, "step": 4782 }, { "epoch": 0.7717005485640529, "grad_norm": 1.4019515995454181, "learning_rate": 1.3585920079573855e-07, "loss": 0.391, "step": 4783 }, { "epoch": 0.7718618909325589, "grad_norm": 1.5414787727846921, "learning_rate": 1.3567643467887035e-07, "loss": 0.347, "step": 4784 }, { "epoch": 0.7720232333010648, "grad_norm": 2.301409960921248, "learning_rate": 1.3549377227934845e-07, "loss": 0.6838, "step": 4785 }, { "epoch": 0.7721845756695709, "grad_norm": 2.221456998626538, "learning_rate": 1.353112136491737e-07, "loss": 0.5942, "step": 4786 }, { "epoch": 0.7723459180380768, "grad_norm": 1.5313164054329118, "learning_rate": 1.3512875884031816e-07, "loss": 0.5149, "step": 4787 }, { "epoch": 0.7725072604065828, "grad_norm": 1.4097244573783172, "learning_rate": 1.349464079047235e-07, "loss": 0.3945, "step": 4788 }, { "epoch": 0.7726686027750888, "grad_norm": 1.3672315753916693, "learning_rate": 1.3476416089430241e-07, "loss": 0.4372, "step": 4789 }, { "epoch": 0.7728299451435947, "grad_norm": 1.9658691885301915, "learning_rate": 1.3458201786093794e-07, "loss": 0.4758, "step": 4790 }, { "epoch": 0.7729912875121007, "grad_norm": 1.603532426307001, "learning_rate": 1.343999788564833e-07, "loss": 0.6407, "step": 4791 }, { "epoch": 0.7731526298806066, "grad_norm": 1.976562198442881, "learning_rate": 1.3421804393276244e-07, "loss": 0.6708, "step": 4792 }, { "epoch": 0.7733139722491126, "grad_norm": 1.58897372347578, "learning_rate": 1.3403621314156904e-07, "loss": 0.3868, "step": 4793 }, { "epoch": 0.7734753146176185, "grad_norm": 1.2692249104557998, "learning_rate": 1.338544865346678e-07, "loss": 0.4838, "step": 4794 }, { "epoch": 0.7736366569861246, "grad_norm": 1.581519602231677, "learning_rate": 1.3367286416379364e-07, "loss": 0.4133, "step": 4795 }, { "epoch": 0.7737979993546306, "grad_norm": 1.640073774469464, "learning_rate": 1.3349134608065138e-07, "loss": 0.5414, "step": 4796 }, { "epoch": 0.7739593417231365, "grad_norm": 1.263070814054833, "learning_rate": 1.333099323369165e-07, "loss": 0.4217, "step": 4797 }, { "epoch": 0.7741206840916425, "grad_norm": 1.2471541434051845, "learning_rate": 1.3312862298423495e-07, "loss": 0.4861, "step": 4798 }, { "epoch": 0.7742820264601484, "grad_norm": 1.6502561254828305, "learning_rate": 1.3294741807422244e-07, "loss": 0.5571, "step": 4799 }, { "epoch": 0.7744433688286544, "grad_norm": 1.2527826807208566, "learning_rate": 1.3276631765846546e-07, "loss": 0.4911, "step": 4800 }, { "epoch": 0.7746047111971603, "grad_norm": 1.6029116274770416, "learning_rate": 1.3258532178852017e-07, "loss": 0.3163, "step": 4801 }, { "epoch": 0.7747660535656663, "grad_norm": 1.4435960629788707, "learning_rate": 1.3240443051591387e-07, "loss": 0.4493, "step": 4802 }, { "epoch": 0.7749273959341723, "grad_norm": 1.3313413830406973, "learning_rate": 1.3222364389214312e-07, "loss": 0.3457, "step": 4803 }, { "epoch": 0.7750887383026783, "grad_norm": 1.7647157876821287, "learning_rate": 1.320429619686752e-07, "loss": 0.6735, "step": 4804 }, { "epoch": 0.7752500806711843, "grad_norm": 1.7607681179411674, "learning_rate": 1.3186238479694771e-07, "loss": 0.3991, "step": 4805 }, { "epoch": 0.7754114230396902, "grad_norm": 2.132321926611002, "learning_rate": 1.3168191242836785e-07, "loss": 0.6104, "step": 4806 }, { "epoch": 0.7755727654081962, "grad_norm": 1.8165056755172557, "learning_rate": 1.3150154491431354e-07, "loss": 0.5819, "step": 4807 }, { "epoch": 0.7757341077767022, "grad_norm": 1.6460660536526688, "learning_rate": 1.3132128230613277e-07, "loss": 0.5309, "step": 4808 }, { "epoch": 0.7758954501452081, "grad_norm": 1.5444603772031473, "learning_rate": 1.3114112465514327e-07, "loss": 0.5428, "step": 4809 }, { "epoch": 0.7760567925137141, "grad_norm": 2.022612060912867, "learning_rate": 1.3096107201263328e-07, "loss": 0.5248, "step": 4810 }, { "epoch": 0.77621813488222, "grad_norm": 1.8421724974043205, "learning_rate": 1.307811244298612e-07, "loss": 0.4723, "step": 4811 }, { "epoch": 0.7763794772507261, "grad_norm": 1.9046795739250717, "learning_rate": 1.306012819580551e-07, "loss": 0.5195, "step": 4812 }, { "epoch": 0.776540819619232, "grad_norm": 1.4668280017866995, "learning_rate": 1.3042154464841342e-07, "loss": 0.2653, "step": 4813 }, { "epoch": 0.776702161987738, "grad_norm": 1.9112403461219971, "learning_rate": 1.3024191255210476e-07, "loss": 0.4768, "step": 4814 }, { "epoch": 0.776863504356244, "grad_norm": 1.6640612105243684, "learning_rate": 1.3006238572026773e-07, "loss": 0.4522, "step": 4815 }, { "epoch": 0.7770248467247499, "grad_norm": 1.8484376057667513, "learning_rate": 1.298829642040105e-07, "loss": 0.5653, "step": 4816 }, { "epoch": 0.7771861890932559, "grad_norm": 1.4535679295967383, "learning_rate": 1.2970364805441186e-07, "loss": 0.4902, "step": 4817 }, { "epoch": 0.7773475314617618, "grad_norm": 1.7249518429212274, "learning_rate": 1.2952443732252054e-07, "loss": 0.5203, "step": 4818 }, { "epoch": 0.7775088738302678, "grad_norm": 1.4289832543365637, "learning_rate": 1.293453320593547e-07, "loss": 0.4869, "step": 4819 }, { "epoch": 0.7776702161987737, "grad_norm": 1.487162813068718, "learning_rate": 1.2916633231590313e-07, "loss": 0.4902, "step": 4820 }, { "epoch": 0.7778315585672798, "grad_norm": 1.9214412540902523, "learning_rate": 1.2898743814312442e-07, "loss": 0.6102, "step": 4821 }, { "epoch": 0.7779929009357858, "grad_norm": 1.040323395882564, "learning_rate": 1.2880864959194664e-07, "loss": 0.4781, "step": 4822 }, { "epoch": 0.7781542433042917, "grad_norm": 1.366172282808463, "learning_rate": 1.2862996671326844e-07, "loss": 0.5017, "step": 4823 }, { "epoch": 0.7783155856727977, "grad_norm": 2.2263806720575934, "learning_rate": 1.28451389557958e-07, "loss": 0.7395, "step": 4824 }, { "epoch": 0.7784769280413036, "grad_norm": 1.3599648346982747, "learning_rate": 1.2827291817685348e-07, "loss": 0.3518, "step": 4825 }, { "epoch": 0.7786382704098096, "grad_norm": 1.28414501535002, "learning_rate": 1.2809455262076324e-07, "loss": 0.3566, "step": 4826 }, { "epoch": 0.7787996127783156, "grad_norm": 1.5855424511103673, "learning_rate": 1.279162929404648e-07, "loss": 0.5263, "step": 4827 }, { "epoch": 0.7789609551468215, "grad_norm": 1.3655134142367291, "learning_rate": 1.277381391867063e-07, "loss": 0.4926, "step": 4828 }, { "epoch": 0.7791222975153276, "grad_norm": 1.2953510292498234, "learning_rate": 1.2756009141020508e-07, "loss": 0.5248, "step": 4829 }, { "epoch": 0.7792836398838335, "grad_norm": 1.6294384276844793, "learning_rate": 1.2738214966164878e-07, "loss": 0.6143, "step": 4830 }, { "epoch": 0.7794449822523395, "grad_norm": 1.5932813217074782, "learning_rate": 1.272043139916949e-07, "loss": 0.5454, "step": 4831 }, { "epoch": 0.7796063246208454, "grad_norm": 1.2617474720995518, "learning_rate": 1.2702658445097015e-07, "loss": 0.3677, "step": 4832 }, { "epoch": 0.7797676669893514, "grad_norm": 1.5242190972527716, "learning_rate": 1.268489610900716e-07, "loss": 0.542, "step": 4833 }, { "epoch": 0.7799290093578574, "grad_norm": 1.6455047697950356, "learning_rate": 1.2667144395956601e-07, "loss": 0.4937, "step": 4834 }, { "epoch": 0.7800903517263633, "grad_norm": 1.4096374402706162, "learning_rate": 1.264940331099895e-07, "loss": 0.4547, "step": 4835 }, { "epoch": 0.7802516940948693, "grad_norm": 1.7868548769650037, "learning_rate": 1.263167285918485e-07, "loss": 0.4736, "step": 4836 }, { "epoch": 0.7804130364633752, "grad_norm": 1.1714264074664449, "learning_rate": 1.2613953045561876e-07, "loss": 0.3594, "step": 4837 }, { "epoch": 0.7805743788318813, "grad_norm": 1.76702292137197, "learning_rate": 1.259624387517459e-07, "loss": 0.4169, "step": 4838 }, { "epoch": 0.7807357212003873, "grad_norm": 1.1230466627037843, "learning_rate": 1.257854535306454e-07, "loss": 0.3485, "step": 4839 }, { "epoch": 0.7808970635688932, "grad_norm": 1.3051424889219818, "learning_rate": 1.2560857484270192e-07, "loss": 0.4123, "step": 4840 }, { "epoch": 0.7810584059373992, "grad_norm": 1.9639428198623097, "learning_rate": 1.2543180273827043e-07, "loss": 0.5228, "step": 4841 }, { "epoch": 0.7812197483059051, "grad_norm": 1.8017055431527358, "learning_rate": 1.2525513726767494e-07, "loss": 0.4592, "step": 4842 }, { "epoch": 0.7813810906744111, "grad_norm": 1.3607004215362337, "learning_rate": 1.2507857848120956e-07, "loss": 0.4902, "step": 4843 }, { "epoch": 0.781542433042917, "grad_norm": 1.5401740538094493, "learning_rate": 1.2490212642913804e-07, "loss": 0.3915, "step": 4844 }, { "epoch": 0.781703775411423, "grad_norm": 1.3709916731611456, "learning_rate": 1.2472578116169324e-07, "loss": 0.4969, "step": 4845 }, { "epoch": 0.7818651177799291, "grad_norm": 1.1239471277073725, "learning_rate": 1.2454954272907815e-07, "loss": 0.4437, "step": 4846 }, { "epoch": 0.782026460148435, "grad_norm": 2.002389910430422, "learning_rate": 1.2437341118146512e-07, "loss": 0.4492, "step": 4847 }, { "epoch": 0.782187802516941, "grad_norm": 1.3232364218047818, "learning_rate": 1.2419738656899614e-07, "loss": 0.4946, "step": 4848 }, { "epoch": 0.7823491448854469, "grad_norm": 1.6016454907756423, "learning_rate": 1.240214689417829e-07, "loss": 0.4808, "step": 4849 }, { "epoch": 0.7825104872539529, "grad_norm": 1.8921461685164855, "learning_rate": 1.2384565834990607e-07, "loss": 0.4252, "step": 4850 }, { "epoch": 0.7826718296224588, "grad_norm": 1.248248542169002, "learning_rate": 1.2366995484341647e-07, "loss": 0.3609, "step": 4851 }, { "epoch": 0.7828331719909648, "grad_norm": 1.7457981755518397, "learning_rate": 1.2349435847233425e-07, "loss": 0.4112, "step": 4852 }, { "epoch": 0.7829945143594708, "grad_norm": 2.029789326075973, "learning_rate": 1.2331886928664886e-07, "loss": 0.5087, "step": 4853 }, { "epoch": 0.7831558567279767, "grad_norm": 1.113098932696749, "learning_rate": 1.2314348733631957e-07, "loss": 0.4521, "step": 4854 }, { "epoch": 0.7833171990964828, "grad_norm": 1.6688083795629698, "learning_rate": 1.2296821267127466e-07, "loss": 0.5587, "step": 4855 }, { "epoch": 0.7834785414649887, "grad_norm": 1.507623690311795, "learning_rate": 1.2279304534141234e-07, "loss": 0.2681, "step": 4856 }, { "epoch": 0.7836398838334947, "grad_norm": 1.487125378405608, "learning_rate": 1.2261798539660011e-07, "loss": 0.4674, "step": 4857 }, { "epoch": 0.7838012262020007, "grad_norm": 1.6524373923025757, "learning_rate": 1.2244303288667474e-07, "loss": 0.4796, "step": 4858 }, { "epoch": 0.7839625685705066, "grad_norm": 1.145266820561316, "learning_rate": 1.2226818786144255e-07, "loss": 0.4864, "step": 4859 }, { "epoch": 0.7841239109390126, "grad_norm": 1.1449492540733306, "learning_rate": 1.2209345037067924e-07, "loss": 0.3562, "step": 4860 }, { "epoch": 0.7842852533075185, "grad_norm": 1.0527211827175205, "learning_rate": 1.2191882046412993e-07, "loss": 0.3801, "step": 4861 }, { "epoch": 0.7844465956760245, "grad_norm": 1.743838703811651, "learning_rate": 1.2174429819150928e-07, "loss": 0.5241, "step": 4862 }, { "epoch": 0.7846079380445304, "grad_norm": 1.5332373354065476, "learning_rate": 1.2156988360250075e-07, "loss": 0.3716, "step": 4863 }, { "epoch": 0.7847692804130365, "grad_norm": 1.533934672584752, "learning_rate": 1.213955767467577e-07, "loss": 0.4324, "step": 4864 }, { "epoch": 0.7849306227815425, "grad_norm": 1.7321185305520623, "learning_rate": 1.2122137767390274e-07, "loss": 0.4123, "step": 4865 }, { "epoch": 0.7850919651500484, "grad_norm": 2.1853280320086754, "learning_rate": 1.2104728643352736e-07, "loss": 0.5552, "step": 4866 }, { "epoch": 0.7852533075185544, "grad_norm": 1.877139079303096, "learning_rate": 1.20873303075193e-07, "loss": 0.4606, "step": 4867 }, { "epoch": 0.7854146498870603, "grad_norm": 1.4988095009695306, "learning_rate": 1.2069942764842982e-07, "loss": 0.4613, "step": 4868 }, { "epoch": 0.7855759922555663, "grad_norm": 1.512977096091788, "learning_rate": 1.205256602027376e-07, "loss": 0.5145, "step": 4869 }, { "epoch": 0.7857373346240722, "grad_norm": 1.7746239948951006, "learning_rate": 1.2035200078758518e-07, "loss": 0.5635, "step": 4870 }, { "epoch": 0.7858986769925782, "grad_norm": 1.6295347030563558, "learning_rate": 1.2017844945241085e-07, "loss": 0.4864, "step": 4871 }, { "epoch": 0.7860600193610843, "grad_norm": 1.314848796361078, "learning_rate": 1.2000500624662218e-07, "loss": 0.4245, "step": 4872 }, { "epoch": 0.7862213617295902, "grad_norm": 1.9176726879956083, "learning_rate": 1.1983167121959542e-07, "loss": 0.5279, "step": 4873 }, { "epoch": 0.7863827040980962, "grad_norm": 1.2300724420691804, "learning_rate": 1.1965844442067663e-07, "loss": 0.5301, "step": 4874 }, { "epoch": 0.7865440464666021, "grad_norm": 1.6369674602902753, "learning_rate": 1.1948532589918093e-07, "loss": 0.6292, "step": 4875 }, { "epoch": 0.7867053888351081, "grad_norm": 1.709777996531806, "learning_rate": 1.193123157043922e-07, "loss": 0.5009, "step": 4876 }, { "epoch": 0.7868667312036141, "grad_norm": 2.515746309186898, "learning_rate": 1.1913941388556403e-07, "loss": 0.7047, "step": 4877 }, { "epoch": 0.78702807357212, "grad_norm": 2.09912376288284, "learning_rate": 1.1896662049191897e-07, "loss": 0.4431, "step": 4878 }, { "epoch": 0.787189415940626, "grad_norm": 1.07531565755809, "learning_rate": 1.1879393557264844e-07, "loss": 0.3678, "step": 4879 }, { "epoch": 0.7873507583091319, "grad_norm": 1.5872202228703716, "learning_rate": 1.186213591769134e-07, "loss": 0.4523, "step": 4880 }, { "epoch": 0.787512100677638, "grad_norm": 1.4290616692916622, "learning_rate": 1.184488913538435e-07, "loss": 0.5014, "step": 4881 }, { "epoch": 0.787673443046144, "grad_norm": 1.6834255054783736, "learning_rate": 1.182765321525378e-07, "loss": 0.4814, "step": 4882 }, { "epoch": 0.7878347854146499, "grad_norm": 1.6148349770952208, "learning_rate": 1.1810428162206437e-07, "loss": 0.5419, "step": 4883 }, { "epoch": 0.7879961277831559, "grad_norm": 2.0019525295349725, "learning_rate": 1.1793213981146021e-07, "loss": 0.5196, "step": 4884 }, { "epoch": 0.7881574701516618, "grad_norm": 1.8246353621235982, "learning_rate": 1.1776010676973169e-07, "loss": 0.434, "step": 4885 }, { "epoch": 0.7883188125201678, "grad_norm": 1.7275905396601907, "learning_rate": 1.1758818254585367e-07, "loss": 0.5008, "step": 4886 }, { "epoch": 0.7884801548886737, "grad_norm": 1.7748424997433614, "learning_rate": 1.1741636718877051e-07, "loss": 0.4252, "step": 4887 }, { "epoch": 0.7886414972571797, "grad_norm": 1.5837108596987284, "learning_rate": 1.1724466074739553e-07, "loss": 0.5568, "step": 4888 }, { "epoch": 0.7888028396256858, "grad_norm": 1.926339399378495, "learning_rate": 1.1707306327061061e-07, "loss": 0.5052, "step": 4889 }, { "epoch": 0.7889641819941917, "grad_norm": 1.8390834395546176, "learning_rate": 1.1690157480726715e-07, "loss": 0.5754, "step": 4890 }, { "epoch": 0.7891255243626977, "grad_norm": 2.053947982857448, "learning_rate": 1.1673019540618539e-07, "loss": 0.5213, "step": 4891 }, { "epoch": 0.7892868667312036, "grad_norm": 1.5476832734569288, "learning_rate": 1.1655892511615412e-07, "loss": 0.3989, "step": 4892 }, { "epoch": 0.7894482090997096, "grad_norm": 1.4930255394020435, "learning_rate": 1.1638776398593152e-07, "loss": 0.466, "step": 4893 }, { "epoch": 0.7896095514682155, "grad_norm": 1.5911748688874225, "learning_rate": 1.162167120642446e-07, "loss": 0.312, "step": 4894 }, { "epoch": 0.7897708938367215, "grad_norm": 1.983771945901685, "learning_rate": 1.1604576939978932e-07, "loss": 0.6019, "step": 4895 }, { "epoch": 0.7899322362052275, "grad_norm": 1.6289841155325078, "learning_rate": 1.1587493604123017e-07, "loss": 0.4687, "step": 4896 }, { "epoch": 0.7900935785737334, "grad_norm": 1.3993298544508053, "learning_rate": 1.1570421203720088e-07, "loss": 0.4299, "step": 4897 }, { "epoch": 0.7902549209422395, "grad_norm": 1.6526897239559013, "learning_rate": 1.155335974363042e-07, "loss": 0.5334, "step": 4898 }, { "epoch": 0.7904162633107454, "grad_norm": 1.3671422896403234, "learning_rate": 1.1536309228711116e-07, "loss": 0.3813, "step": 4899 }, { "epoch": 0.7905776056792514, "grad_norm": 1.2653529027742676, "learning_rate": 1.1519269663816217e-07, "loss": 0.4703, "step": 4900 }, { "epoch": 0.7907389480477574, "grad_norm": 1.6272586384536785, "learning_rate": 1.150224105379663e-07, "loss": 0.3928, "step": 4901 }, { "epoch": 0.7909002904162633, "grad_norm": 1.7556653281523555, "learning_rate": 1.1485223403500117e-07, "loss": 0.4851, "step": 4902 }, { "epoch": 0.7910616327847693, "grad_norm": 1.3264075001092728, "learning_rate": 1.1468216717771362e-07, "loss": 0.3864, "step": 4903 }, { "epoch": 0.7912229751532752, "grad_norm": 1.5030393802105602, "learning_rate": 1.145122100145191e-07, "loss": 0.5088, "step": 4904 }, { "epoch": 0.7913843175217812, "grad_norm": 1.3388588624746875, "learning_rate": 1.1434236259380154e-07, "loss": 0.3023, "step": 4905 }, { "epoch": 0.7915456598902871, "grad_norm": 1.8586550689241472, "learning_rate": 1.1417262496391433e-07, "loss": 0.4476, "step": 4906 }, { "epoch": 0.7917070022587932, "grad_norm": 1.639433655294878, "learning_rate": 1.1400299717317884e-07, "loss": 0.5, "step": 4907 }, { "epoch": 0.7918683446272992, "grad_norm": 1.8240743902621754, "learning_rate": 1.138334792698858e-07, "loss": 0.5252, "step": 4908 }, { "epoch": 0.7920296869958051, "grad_norm": 1.7723182137354807, "learning_rate": 1.13664071302294e-07, "loss": 0.3582, "step": 4909 }, { "epoch": 0.7921910293643111, "grad_norm": 2.763531684689708, "learning_rate": 1.134947733186315e-07, "loss": 0.7741, "step": 4910 }, { "epoch": 0.792352371732817, "grad_norm": 2.086153854979716, "learning_rate": 1.1332558536709497e-07, "loss": 0.3164, "step": 4911 }, { "epoch": 0.792513714101323, "grad_norm": 1.586411353668236, "learning_rate": 1.1315650749584938e-07, "loss": 0.5492, "step": 4912 }, { "epoch": 0.7926750564698289, "grad_norm": 1.4592222047988117, "learning_rate": 1.1298753975302872e-07, "loss": 0.4396, "step": 4913 }, { "epoch": 0.7928363988383349, "grad_norm": 1.6216906080736266, "learning_rate": 1.1281868218673562e-07, "loss": 0.4061, "step": 4914 }, { "epoch": 0.792997741206841, "grad_norm": 1.265753609986858, "learning_rate": 1.1264993484504104e-07, "loss": 0.4519, "step": 4915 }, { "epoch": 0.7931590835753469, "grad_norm": 1.632429744266074, "learning_rate": 1.1248129777598491e-07, "loss": 0.5645, "step": 4916 }, { "epoch": 0.7933204259438529, "grad_norm": 1.9446327216714818, "learning_rate": 1.1231277102757558e-07, "loss": 0.5731, "step": 4917 }, { "epoch": 0.7934817683123588, "grad_norm": 1.447452013211888, "learning_rate": 1.1214435464779003e-07, "loss": 0.342, "step": 4918 }, { "epoch": 0.7936431106808648, "grad_norm": 1.4674581162358584, "learning_rate": 1.11976048684574e-07, "loss": 0.5227, "step": 4919 }, { "epoch": 0.7938044530493708, "grad_norm": 1.3423468338668187, "learning_rate": 1.1180785318584135e-07, "loss": 0.4747, "step": 4920 }, { "epoch": 0.7939657954178767, "grad_norm": 1.6776014992583075, "learning_rate": 1.1163976819947502e-07, "loss": 0.4693, "step": 4921 }, { "epoch": 0.7941271377863827, "grad_norm": 1.7167328528573786, "learning_rate": 1.1147179377332594e-07, "loss": 0.4777, "step": 4922 }, { "epoch": 0.7942884801548886, "grad_norm": 1.3812745156852708, "learning_rate": 1.113039299552141e-07, "loss": 0.4309, "step": 4923 }, { "epoch": 0.7944498225233947, "grad_norm": 1.5908351497063609, "learning_rate": 1.1113617679292775e-07, "loss": 0.5228, "step": 4924 }, { "epoch": 0.7946111648919006, "grad_norm": 1.7018248468770933, "learning_rate": 1.1096853433422343e-07, "loss": 0.5005, "step": 4925 }, { "epoch": 0.7947725072604066, "grad_norm": 1.2727031813855638, "learning_rate": 1.1080100262682662e-07, "loss": 0.3182, "step": 4926 }, { "epoch": 0.7949338496289126, "grad_norm": 2.4567104306025858, "learning_rate": 1.1063358171843102e-07, "loss": 0.4515, "step": 4927 }, { "epoch": 0.7950951919974185, "grad_norm": 1.574659413645355, "learning_rate": 1.1046627165669847e-07, "loss": 0.5416, "step": 4928 }, { "epoch": 0.7952565343659245, "grad_norm": 1.5953332852943967, "learning_rate": 1.1029907248926012e-07, "loss": 0.4381, "step": 4929 }, { "epoch": 0.7954178767344304, "grad_norm": 2.2890949963030387, "learning_rate": 1.1013198426371467e-07, "loss": 0.4652, "step": 4930 }, { "epoch": 0.7955792191029364, "grad_norm": 1.2983762026570338, "learning_rate": 1.0996500702762956e-07, "loss": 0.3512, "step": 4931 }, { "epoch": 0.7957405614714425, "grad_norm": 1.7240903252573503, "learning_rate": 1.0979814082854094e-07, "loss": 0.4802, "step": 4932 }, { "epoch": 0.7959019038399484, "grad_norm": 1.5375789715094084, "learning_rate": 1.0963138571395275e-07, "loss": 0.4483, "step": 4933 }, { "epoch": 0.7960632462084544, "grad_norm": 2.2115124066199106, "learning_rate": 1.094647417313379e-07, "loss": 0.6313, "step": 4934 }, { "epoch": 0.7962245885769603, "grad_norm": 1.9533348275961167, "learning_rate": 1.0929820892813707e-07, "loss": 0.5639, "step": 4935 }, { "epoch": 0.7963859309454663, "grad_norm": 1.2622765409414678, "learning_rate": 1.091317873517597e-07, "loss": 0.4476, "step": 4936 }, { "epoch": 0.7965472733139722, "grad_norm": 1.7909256341554765, "learning_rate": 1.0896547704958375e-07, "loss": 0.464, "step": 4937 }, { "epoch": 0.7967086156824782, "grad_norm": 1.490516086014249, "learning_rate": 1.0879927806895483e-07, "loss": 0.3918, "step": 4938 }, { "epoch": 0.7968699580509842, "grad_norm": 1.72815372105163, "learning_rate": 1.0863319045718739e-07, "loss": 0.5393, "step": 4939 }, { "epoch": 0.7970313004194901, "grad_norm": 2.004388762267124, "learning_rate": 1.0846721426156408e-07, "loss": 0.5391, "step": 4940 }, { "epoch": 0.7971926427879962, "grad_norm": 1.801885634439547, "learning_rate": 1.0830134952933572e-07, "loss": 0.5718, "step": 4941 }, { "epoch": 0.7973539851565021, "grad_norm": 1.5102423819476247, "learning_rate": 1.0813559630772174e-07, "loss": 0.5981, "step": 4942 }, { "epoch": 0.7975153275250081, "grad_norm": 1.4239601524060492, "learning_rate": 1.0796995464390907e-07, "loss": 0.4865, "step": 4943 }, { "epoch": 0.797676669893514, "grad_norm": 1.8520931336608817, "learning_rate": 1.0780442458505362e-07, "loss": 0.4416, "step": 4944 }, { "epoch": 0.79783801226202, "grad_norm": 1.2482178381411866, "learning_rate": 1.0763900617827943e-07, "loss": 0.2534, "step": 4945 }, { "epoch": 0.797999354630526, "grad_norm": 1.534781140807054, "learning_rate": 1.0747369947067819e-07, "loss": 0.4153, "step": 4946 }, { "epoch": 0.7981606969990319, "grad_norm": 1.4003172446887544, "learning_rate": 1.0730850450931061e-07, "loss": 0.4735, "step": 4947 }, { "epoch": 0.7983220393675379, "grad_norm": 1.4959696825341753, "learning_rate": 1.071434213412048e-07, "loss": 0.5219, "step": 4948 }, { "epoch": 0.7984833817360439, "grad_norm": 1.0182985182979787, "learning_rate": 1.0697845001335759e-07, "loss": 0.2996, "step": 4949 }, { "epoch": 0.7986447241045499, "grad_norm": 1.7461115688976487, "learning_rate": 1.0681359057273388e-07, "loss": 0.6182, "step": 4950 }, { "epoch": 0.7988060664730559, "grad_norm": 1.5118337976803111, "learning_rate": 1.0664884306626626e-07, "loss": 0.4017, "step": 4951 }, { "epoch": 0.7989674088415618, "grad_norm": 2.001583187999911, "learning_rate": 1.0648420754085641e-07, "loss": 0.4394, "step": 4952 }, { "epoch": 0.7991287512100678, "grad_norm": 1.1670615458926086, "learning_rate": 1.0631968404337304e-07, "loss": 0.4624, "step": 4953 }, { "epoch": 0.7992900935785737, "grad_norm": 2.2951732741290725, "learning_rate": 1.0615527262065371e-07, "loss": 0.471, "step": 4954 }, { "epoch": 0.7994514359470797, "grad_norm": 1.057425329218938, "learning_rate": 1.059909733195039e-07, "loss": 0.3964, "step": 4955 }, { "epoch": 0.7996127783155856, "grad_norm": 1.7935236984146328, "learning_rate": 1.0582678618669688e-07, "loss": 0.6126, "step": 4956 }, { "epoch": 0.7997741206840916, "grad_norm": 2.1235239849204604, "learning_rate": 1.0566271126897435e-07, "loss": 0.4265, "step": 4957 }, { "epoch": 0.7999354630525977, "grad_norm": 1.872048025142828, "learning_rate": 1.0549874861304603e-07, "loss": 0.4784, "step": 4958 }, { "epoch": 0.8000968054211036, "grad_norm": 1.1704403042242537, "learning_rate": 1.0533489826558928e-07, "loss": 0.4084, "step": 4959 }, { "epoch": 0.8002581477896096, "grad_norm": 1.641912264130583, "learning_rate": 1.0517116027325018e-07, "loss": 0.3518, "step": 4960 }, { "epoch": 0.8004194901581155, "grad_norm": 1.6369259505118448, "learning_rate": 1.0500753468264206e-07, "loss": 0.4559, "step": 4961 }, { "epoch": 0.8005808325266215, "grad_norm": 1.2999734747454692, "learning_rate": 1.0484402154034661e-07, "loss": 0.4942, "step": 4962 }, { "epoch": 0.8007421748951274, "grad_norm": 1.2527219699760475, "learning_rate": 1.0468062089291402e-07, "loss": 0.3136, "step": 4963 }, { "epoch": 0.8009035172636334, "grad_norm": 2.011614217171878, "learning_rate": 1.0451733278686148e-07, "loss": 0.5922, "step": 4964 }, { "epoch": 0.8010648596321394, "grad_norm": 1.3561051163876678, "learning_rate": 1.0435415726867492e-07, "loss": 0.4053, "step": 4965 }, { "epoch": 0.8012262020006453, "grad_norm": 1.637079968344218, "learning_rate": 1.041910943848076e-07, "loss": 0.5188, "step": 4966 }, { "epoch": 0.8013875443691514, "grad_norm": 1.6288477020353318, "learning_rate": 1.0402814418168121e-07, "loss": 0.4842, "step": 4967 }, { "epoch": 0.8015488867376573, "grad_norm": 1.0765866347488022, "learning_rate": 1.0386530670568527e-07, "loss": 0.4848, "step": 4968 }, { "epoch": 0.8017102291061633, "grad_norm": 2.0535942621324046, "learning_rate": 1.0370258200317688e-07, "loss": 0.6445, "step": 4969 }, { "epoch": 0.8018715714746693, "grad_norm": 1.2190181730572751, "learning_rate": 1.0353997012048138e-07, "loss": 0.4387, "step": 4970 }, { "epoch": 0.8020329138431752, "grad_norm": 1.164765932693456, "learning_rate": 1.0337747110389211e-07, "loss": 0.4261, "step": 4971 }, { "epoch": 0.8021942562116812, "grad_norm": 1.6457856167342664, "learning_rate": 1.0321508499966964e-07, "loss": 0.4908, "step": 4972 }, { "epoch": 0.8023555985801871, "grad_norm": 1.2598243877101534, "learning_rate": 1.0305281185404318e-07, "loss": 0.3268, "step": 4973 }, { "epoch": 0.8025169409486931, "grad_norm": 1.5276518606064522, "learning_rate": 1.0289065171320904e-07, "loss": 0.4627, "step": 4974 }, { "epoch": 0.8026782833171991, "grad_norm": 1.7754075455567446, "learning_rate": 1.0272860462333205e-07, "loss": 0.4557, "step": 4975 }, { "epoch": 0.8028396256857051, "grad_norm": 1.5908339507453297, "learning_rate": 1.0256667063054452e-07, "loss": 0.5172, "step": 4976 }, { "epoch": 0.8030009680542111, "grad_norm": 1.1248318758482334, "learning_rate": 1.0240484978094638e-07, "loss": 0.5182, "step": 4977 }, { "epoch": 0.803162310422717, "grad_norm": 1.3326413723382056, "learning_rate": 1.0224314212060587e-07, "loss": 0.4228, "step": 4978 }, { "epoch": 0.803323652791223, "grad_norm": 1.6685702737899413, "learning_rate": 1.0208154769555827e-07, "loss": 0.534, "step": 4979 }, { "epoch": 0.8034849951597289, "grad_norm": 1.984830428598813, "learning_rate": 1.0192006655180729e-07, "loss": 0.7472, "step": 4980 }, { "epoch": 0.8036463375282349, "grad_norm": 1.443998987435279, "learning_rate": 1.0175869873532423e-07, "loss": 0.4527, "step": 4981 }, { "epoch": 0.8038076798967408, "grad_norm": 1.5499534415358918, "learning_rate": 1.0159744429204775e-07, "loss": 0.445, "step": 4982 }, { "epoch": 0.8039690222652468, "grad_norm": 2.7628663523516717, "learning_rate": 1.0143630326788471e-07, "loss": 0.5175, "step": 4983 }, { "epoch": 0.8041303646337529, "grad_norm": 1.7852507384688268, "learning_rate": 1.0127527570870959e-07, "loss": 0.6165, "step": 4984 }, { "epoch": 0.8042917070022588, "grad_norm": 2.1354241223709085, "learning_rate": 1.0111436166036413e-07, "loss": 0.5904, "step": 4985 }, { "epoch": 0.8044530493707648, "grad_norm": 1.7046022920091153, "learning_rate": 1.0095356116865855e-07, "loss": 0.5381, "step": 4986 }, { "epoch": 0.8046143917392707, "grad_norm": 1.2663891216345824, "learning_rate": 1.0079287427936994e-07, "loss": 0.6197, "step": 4987 }, { "epoch": 0.8047757341077767, "grad_norm": 1.254166287942789, "learning_rate": 1.0063230103824366e-07, "loss": 0.5918, "step": 4988 }, { "epoch": 0.8049370764762827, "grad_norm": 1.549351184164897, "learning_rate": 1.004718414909922e-07, "loss": 0.4945, "step": 4989 }, { "epoch": 0.8050984188447886, "grad_norm": 1.8101126634719922, "learning_rate": 1.00311495683296e-07, "loss": 0.5041, "step": 4990 }, { "epoch": 0.8052597612132946, "grad_norm": 1.6111506323881548, "learning_rate": 1.0015126366080329e-07, "loss": 0.5226, "step": 4991 }, { "epoch": 0.8054211035818006, "grad_norm": 1.7995186638994363, "learning_rate": 9.99911454691294e-08, "loss": 0.4225, "step": 4992 }, { "epoch": 0.8055824459503066, "grad_norm": 1.7758045280599022, "learning_rate": 9.98311411538576e-08, "loss": 0.4904, "step": 4993 }, { "epoch": 0.8057437883188125, "grad_norm": 1.4223495362256402, "learning_rate": 9.967125076053884e-08, "loss": 0.3867, "step": 4994 }, { "epoch": 0.8059051306873185, "grad_norm": 1.7104234315070108, "learning_rate": 9.951147433469114e-08, "loss": 0.5407, "step": 4995 }, { "epoch": 0.8060664730558245, "grad_norm": 1.831017122010491, "learning_rate": 9.93518119218007e-08, "loss": 0.4171, "step": 4996 }, { "epoch": 0.8062278154243304, "grad_norm": 1.9872494401680227, "learning_rate": 9.91922635673208e-08, "loss": 0.5889, "step": 4997 }, { "epoch": 0.8063891577928364, "grad_norm": 1.5228074556044446, "learning_rate": 9.903282931667245e-08, "loss": 0.3666, "step": 4998 }, { "epoch": 0.8065505001613423, "grad_norm": 1.1630573413321823, "learning_rate": 9.887350921524434e-08, "loss": 0.3968, "step": 4999 }, { "epoch": 0.8067118425298483, "grad_norm": 2.04902349800573, "learning_rate": 9.87143033083922e-08, "loss": 0.3995, "step": 5000 }, { "epoch": 0.8068731848983544, "grad_norm": 2.409778732335888, "learning_rate": 9.85552116414397e-08, "loss": 0.4946, "step": 5001 }, { "epoch": 0.8070345272668603, "grad_norm": 1.6389604389427195, "learning_rate": 9.839623425967758e-08, "loss": 0.5266, "step": 5002 }, { "epoch": 0.8071958696353663, "grad_norm": 1.4408856203745657, "learning_rate": 9.823737120836444e-08, "loss": 0.4451, "step": 5003 }, { "epoch": 0.8073572120038722, "grad_norm": 1.8321817494861214, "learning_rate": 9.80786225327262e-08, "loss": 0.7822, "step": 5004 }, { "epoch": 0.8075185543723782, "grad_norm": 1.6189477895431723, "learning_rate": 9.791998827795606e-08, "loss": 0.4687, "step": 5005 }, { "epoch": 0.8076798967408841, "grad_norm": 1.5687076851658543, "learning_rate": 9.776146848921473e-08, "loss": 0.5182, "step": 5006 }, { "epoch": 0.8078412391093901, "grad_norm": 1.6148659079424414, "learning_rate": 9.760306321163065e-08, "loss": 0.329, "step": 5007 }, { "epoch": 0.808002581477896, "grad_norm": 1.6443306626157004, "learning_rate": 9.744477249029887e-08, "loss": 0.5545, "step": 5008 }, { "epoch": 0.808163923846402, "grad_norm": 1.5935904479102454, "learning_rate": 9.728659637028291e-08, "loss": 0.5676, "step": 5009 }, { "epoch": 0.8083252662149081, "grad_norm": 1.6108798056482176, "learning_rate": 9.712853489661271e-08, "loss": 0.4503, "step": 5010 }, { "epoch": 0.808486608583414, "grad_norm": 1.696063459873543, "learning_rate": 9.69705881142861e-08, "loss": 0.4545, "step": 5011 }, { "epoch": 0.80864795095192, "grad_norm": 1.7386461882106838, "learning_rate": 9.681275606826817e-08, "loss": 0.6045, "step": 5012 }, { "epoch": 0.808809293320426, "grad_norm": 1.4570148865954466, "learning_rate": 9.665503880349108e-08, "loss": 0.3473, "step": 5013 }, { "epoch": 0.8089706356889319, "grad_norm": 1.6695148529689867, "learning_rate": 9.64974363648548e-08, "loss": 0.4591, "step": 5014 }, { "epoch": 0.8091319780574379, "grad_norm": 2.1809643796157214, "learning_rate": 9.633994879722607e-08, "loss": 0.6271, "step": 5015 }, { "epoch": 0.8092933204259438, "grad_norm": 1.223862220832249, "learning_rate": 9.618257614543923e-08, "loss": 0.5234, "step": 5016 }, { "epoch": 0.8094546627944498, "grad_norm": 1.188378310393448, "learning_rate": 9.602531845429612e-08, "loss": 0.2796, "step": 5017 }, { "epoch": 0.8096160051629558, "grad_norm": 1.6225785039827547, "learning_rate": 9.586817576856532e-08, "loss": 0.5038, "step": 5018 }, { "epoch": 0.8097773475314618, "grad_norm": 1.6627067093227257, "learning_rate": 9.571114813298298e-08, "loss": 0.458, "step": 5019 }, { "epoch": 0.8099386898999678, "grad_norm": 2.2736432483814704, "learning_rate": 9.555423559225256e-08, "loss": 0.3767, "step": 5020 }, { "epoch": 0.8101000322684737, "grad_norm": 2.151525435339674, "learning_rate": 9.539743819104467e-08, "loss": 0.4661, "step": 5021 }, { "epoch": 0.8102613746369797, "grad_norm": 1.8940879696453834, "learning_rate": 9.524075597399716e-08, "loss": 0.5789, "step": 5022 }, { "epoch": 0.8104227170054856, "grad_norm": 1.0617111306832874, "learning_rate": 9.508418898571491e-08, "loss": 0.4714, "step": 5023 }, { "epoch": 0.8105840593739916, "grad_norm": 1.5744047220717536, "learning_rate": 9.492773727077024e-08, "loss": 0.4649, "step": 5024 }, { "epoch": 0.8107454017424975, "grad_norm": 1.2726018773424927, "learning_rate": 9.477140087370267e-08, "loss": 0.4776, "step": 5025 }, { "epoch": 0.8109067441110035, "grad_norm": 1.6342631197795632, "learning_rate": 9.461517983901851e-08, "loss": 0.5059, "step": 5026 }, { "epoch": 0.8110680864795096, "grad_norm": 1.112501333000156, "learning_rate": 9.445907421119181e-08, "loss": 0.2905, "step": 5027 }, { "epoch": 0.8112294288480155, "grad_norm": 1.5640679693745685, "learning_rate": 9.430308403466314e-08, "loss": 0.5424, "step": 5028 }, { "epoch": 0.8113907712165215, "grad_norm": 1.5662959718769451, "learning_rate": 9.414720935384069e-08, "loss": 0.5369, "step": 5029 }, { "epoch": 0.8115521135850274, "grad_norm": 1.6450593150381831, "learning_rate": 9.399145021309973e-08, "loss": 0.5004, "step": 5030 }, { "epoch": 0.8117134559535334, "grad_norm": 1.3585039614751997, "learning_rate": 9.383580665678203e-08, "loss": 0.4309, "step": 5031 }, { "epoch": 0.8118747983220393, "grad_norm": 1.5747289848275887, "learning_rate": 9.368027872919759e-08, "loss": 0.3175, "step": 5032 }, { "epoch": 0.8120361406905453, "grad_norm": 1.43618307742917, "learning_rate": 9.352486647462233e-08, "loss": 0.6361, "step": 5033 }, { "epoch": 0.8121974830590513, "grad_norm": 1.7880685368956382, "learning_rate": 9.336956993729994e-08, "loss": 0.4351, "step": 5034 }, { "epoch": 0.8123588254275573, "grad_norm": 1.2545648194189318, "learning_rate": 9.321438916144104e-08, "loss": 0.4949, "step": 5035 }, { "epoch": 0.8125201677960633, "grad_norm": 1.838754578564595, "learning_rate": 9.305932419122298e-08, "loss": 0.7026, "step": 5036 }, { "epoch": 0.8126815101645692, "grad_norm": 1.7414068003842291, "learning_rate": 9.290437507079052e-08, "loss": 0.417, "step": 5037 }, { "epoch": 0.8128428525330752, "grad_norm": 1.4241257344735125, "learning_rate": 9.274954184425548e-08, "loss": 0.3714, "step": 5038 }, { "epoch": 0.8130041949015812, "grad_norm": 1.3348001468381723, "learning_rate": 9.259482455569618e-08, "loss": 0.4352, "step": 5039 }, { "epoch": 0.8131655372700871, "grad_norm": 1.3791151455862782, "learning_rate": 9.244022324915857e-08, "loss": 0.3501, "step": 5040 }, { "epoch": 0.8133268796385931, "grad_norm": 1.495849828531739, "learning_rate": 9.228573796865502e-08, "loss": 0.333, "step": 5041 }, { "epoch": 0.813488222007099, "grad_norm": 1.5925128099055075, "learning_rate": 9.213136875816525e-08, "loss": 0.4807, "step": 5042 }, { "epoch": 0.813649564375605, "grad_norm": 1.6289331813571029, "learning_rate": 9.197711566163574e-08, "loss": 0.5362, "step": 5043 }, { "epoch": 0.813810906744111, "grad_norm": 1.8136794100589895, "learning_rate": 9.182297872298012e-08, "loss": 0.5649, "step": 5044 }, { "epoch": 0.813972249112617, "grad_norm": 1.5360379004819247, "learning_rate": 9.166895798607882e-08, "loss": 0.5161, "step": 5045 }, { "epoch": 0.814133591481123, "grad_norm": 1.8142940587022438, "learning_rate": 9.1515053494779e-08, "loss": 0.5346, "step": 5046 }, { "epoch": 0.8142949338496289, "grad_norm": 1.6840028907361444, "learning_rate": 9.136126529289506e-08, "loss": 0.4843, "step": 5047 }, { "epoch": 0.8144562762181349, "grad_norm": 1.6942099224797478, "learning_rate": 9.120759342420819e-08, "loss": 0.5853, "step": 5048 }, { "epoch": 0.8146176185866408, "grad_norm": 1.666571932325525, "learning_rate": 9.10540379324663e-08, "loss": 0.5067, "step": 5049 }, { "epoch": 0.8147789609551468, "grad_norm": 1.399860276334273, "learning_rate": 9.090059886138429e-08, "loss": 0.3664, "step": 5050 }, { "epoch": 0.8149403033236527, "grad_norm": 1.8384380437450392, "learning_rate": 9.074727625464412e-08, "loss": 0.6493, "step": 5051 }, { "epoch": 0.8151016456921588, "grad_norm": 1.46968422255251, "learning_rate": 9.05940701558941e-08, "loss": 0.4737, "step": 5052 }, { "epoch": 0.8152629880606648, "grad_norm": 1.6535498723899915, "learning_rate": 9.044098060874989e-08, "loss": 0.4679, "step": 5053 }, { "epoch": 0.8154243304291707, "grad_norm": 1.9714897105804434, "learning_rate": 9.028800765679346e-08, "loss": 0.4435, "step": 5054 }, { "epoch": 0.8155856727976767, "grad_norm": 2.1054066947893624, "learning_rate": 9.013515134357414e-08, "loss": 0.4505, "step": 5055 }, { "epoch": 0.8157470151661826, "grad_norm": 1.4960839492835396, "learning_rate": 8.998241171260784e-08, "loss": 0.5526, "step": 5056 }, { "epoch": 0.8159083575346886, "grad_norm": 1.5698189790202213, "learning_rate": 8.982978880737696e-08, "loss": 0.5815, "step": 5057 }, { "epoch": 0.8160696999031946, "grad_norm": 1.3862836128198044, "learning_rate": 8.967728267133112e-08, "loss": 0.5391, "step": 5058 }, { "epoch": 0.8162310422717005, "grad_norm": 1.8497436861378223, "learning_rate": 8.952489334788621e-08, "loss": 0.4677, "step": 5059 }, { "epoch": 0.8163923846402065, "grad_norm": 2.851032197318881, "learning_rate": 8.937262088042536e-08, "loss": 0.6208, "step": 5060 }, { "epoch": 0.8165537270087125, "grad_norm": 1.1486293606510845, "learning_rate": 8.922046531229826e-08, "loss": 0.319, "step": 5061 }, { "epoch": 0.8167150693772185, "grad_norm": 2.026733659444295, "learning_rate": 8.906842668682102e-08, "loss": 0.6056, "step": 5062 }, { "epoch": 0.8168764117457245, "grad_norm": 1.4200792815682268, "learning_rate": 8.891650504727688e-08, "loss": 0.4111, "step": 5063 }, { "epoch": 0.8170377541142304, "grad_norm": 2.0354793240896325, "learning_rate": 8.876470043691575e-08, "loss": 0.4898, "step": 5064 }, { "epoch": 0.8171990964827364, "grad_norm": 1.5581561169690354, "learning_rate": 8.861301289895373e-08, "loss": 0.2948, "step": 5065 }, { "epoch": 0.8173604388512423, "grad_norm": 1.9168340430223425, "learning_rate": 8.846144247657417e-08, "loss": 0.5284, "step": 5066 }, { "epoch": 0.8175217812197483, "grad_norm": 1.811285796091462, "learning_rate": 8.830998921292676e-08, "loss": 0.5942, "step": 5067 }, { "epoch": 0.8176831235882542, "grad_norm": 1.7303621763043402, "learning_rate": 8.815865315112803e-08, "loss": 0.4139, "step": 5068 }, { "epoch": 0.8178444659567602, "grad_norm": 1.7824838614569194, "learning_rate": 8.800743433426105e-08, "loss": 0.4868, "step": 5069 }, { "epoch": 0.8180058083252663, "grad_norm": 1.484917712131264, "learning_rate": 8.785633280537535e-08, "loss": 0.4788, "step": 5070 }, { "epoch": 0.8181671506937722, "grad_norm": 1.4708995105383402, "learning_rate": 8.770534860748746e-08, "loss": 0.3917, "step": 5071 }, { "epoch": 0.8183284930622782, "grad_norm": 1.6590534451816543, "learning_rate": 8.755448178357994e-08, "loss": 0.5647, "step": 5072 }, { "epoch": 0.8184898354307841, "grad_norm": 1.3617525902995375, "learning_rate": 8.740373237660254e-08, "loss": 0.3658, "step": 5073 }, { "epoch": 0.8186511777992901, "grad_norm": 1.6880891972247394, "learning_rate": 8.725310042947126e-08, "loss": 0.3602, "step": 5074 }, { "epoch": 0.818812520167796, "grad_norm": 1.32991418934544, "learning_rate": 8.710258598506864e-08, "loss": 0.3924, "step": 5075 }, { "epoch": 0.818973862536302, "grad_norm": 1.666327569161905, "learning_rate": 8.69521890862438e-08, "loss": 0.3762, "step": 5076 }, { "epoch": 0.819135204904808, "grad_norm": 1.6417170931563672, "learning_rate": 8.680190977581254e-08, "loss": 0.4118, "step": 5077 }, { "epoch": 0.819296547273314, "grad_norm": 1.625894373743923, "learning_rate": 8.665174809655707e-08, "loss": 0.477, "step": 5078 }, { "epoch": 0.81945788964182, "grad_norm": 1.4247101405295002, "learning_rate": 8.65017040912262e-08, "loss": 0.4248, "step": 5079 }, { "epoch": 0.8196192320103259, "grad_norm": 1.454503461789719, "learning_rate": 8.635177780253495e-08, "loss": 0.612, "step": 5080 }, { "epoch": 0.8197805743788319, "grad_norm": 1.585249878593841, "learning_rate": 8.62019692731652e-08, "loss": 0.4505, "step": 5081 }, { "epoch": 0.8199419167473379, "grad_norm": 1.669800656921718, "learning_rate": 8.605227854576519e-08, "loss": 0.5177, "step": 5082 }, { "epoch": 0.8201032591158438, "grad_norm": 1.2894418793944549, "learning_rate": 8.590270566294938e-08, "loss": 0.3306, "step": 5083 }, { "epoch": 0.8202646014843498, "grad_norm": 1.6341504177911281, "learning_rate": 8.575325066729915e-08, "loss": 0.5032, "step": 5084 }, { "epoch": 0.8204259438528557, "grad_norm": 1.614730516513729, "learning_rate": 8.560391360136171e-08, "loss": 0.4624, "step": 5085 }, { "epoch": 0.8205872862213617, "grad_norm": 1.7058738433594696, "learning_rate": 8.54546945076513e-08, "loss": 0.5333, "step": 5086 }, { "epoch": 0.8207486285898677, "grad_norm": 1.4848973208548986, "learning_rate": 8.530559342864834e-08, "loss": 0.4093, "step": 5087 }, { "epoch": 0.8209099709583737, "grad_norm": 1.4269562360686299, "learning_rate": 8.515661040679939e-08, "loss": 0.4948, "step": 5088 }, { "epoch": 0.8210713133268797, "grad_norm": 1.434393553104795, "learning_rate": 8.500774548451778e-08, "loss": 0.4935, "step": 5089 }, { "epoch": 0.8212326556953856, "grad_norm": 2.2199435180489697, "learning_rate": 8.485899870418312e-08, "loss": 0.5115, "step": 5090 }, { "epoch": 0.8213939980638916, "grad_norm": 1.3246894382499523, "learning_rate": 8.471037010814125e-08, "loss": 0.395, "step": 5091 }, { "epoch": 0.8215553404323975, "grad_norm": 1.5717463992468799, "learning_rate": 8.456185973870461e-08, "loss": 0.4582, "step": 5092 }, { "epoch": 0.8217166828009035, "grad_norm": 1.6094748456778158, "learning_rate": 8.441346763815161e-08, "loss": 0.5678, "step": 5093 }, { "epoch": 0.8218780251694094, "grad_norm": 1.4698895842416397, "learning_rate": 8.426519384872732e-08, "loss": 0.4542, "step": 5094 }, { "epoch": 0.8220393675379155, "grad_norm": 1.6433666754094143, "learning_rate": 8.411703841264317e-08, "loss": 0.6006, "step": 5095 }, { "epoch": 0.8222007099064215, "grad_norm": 1.7584115597045569, "learning_rate": 8.396900137207646e-08, "loss": 0.299, "step": 5096 }, { "epoch": 0.8223620522749274, "grad_norm": 1.739968087901233, "learning_rate": 8.382108276917138e-08, "loss": 0.3999, "step": 5097 }, { "epoch": 0.8225233946434334, "grad_norm": 1.4966121402966355, "learning_rate": 8.367328264603773e-08, "loss": 0.464, "step": 5098 }, { "epoch": 0.8226847370119393, "grad_norm": 1.444765637013625, "learning_rate": 8.35256010447522e-08, "loss": 0.5429, "step": 5099 }, { "epoch": 0.8228460793804453, "grad_norm": 2.2407467562954073, "learning_rate": 8.337803800735738e-08, "loss": 0.662, "step": 5100 }, { "epoch": 0.8230074217489513, "grad_norm": 1.1941166798914615, "learning_rate": 8.323059357586232e-08, "loss": 0.4724, "step": 5101 }, { "epoch": 0.8231687641174572, "grad_norm": 1.3324543569022709, "learning_rate": 8.308326779224218e-08, "loss": 0.5089, "step": 5102 }, { "epoch": 0.8233301064859632, "grad_norm": 1.4990129402116164, "learning_rate": 8.293606069843817e-08, "loss": 0.4636, "step": 5103 }, { "epoch": 0.8234914488544692, "grad_norm": 1.6145886903079185, "learning_rate": 8.278897233635806e-08, "loss": 0.3956, "step": 5104 }, { "epoch": 0.8236527912229752, "grad_norm": 1.4245421159168135, "learning_rate": 8.264200274787575e-08, "loss": 0.5516, "step": 5105 }, { "epoch": 0.8238141335914811, "grad_norm": 1.1537721735517021, "learning_rate": 8.249515197483103e-08, "loss": 0.3832, "step": 5106 }, { "epoch": 0.8239754759599871, "grad_norm": 1.3159869060966458, "learning_rate": 8.23484200590302e-08, "loss": 0.4817, "step": 5107 }, { "epoch": 0.8241368183284931, "grad_norm": 2.070328392111764, "learning_rate": 8.220180704224544e-08, "loss": 0.5444, "step": 5108 }, { "epoch": 0.824298160696999, "grad_norm": 1.4622972657626874, "learning_rate": 8.205531296621538e-08, "loss": 0.4334, "step": 5109 }, { "epoch": 0.824459503065505, "grad_norm": 2.11194312977806, "learning_rate": 8.190893787264469e-08, "loss": 0.6054, "step": 5110 }, { "epoch": 0.8246208454340109, "grad_norm": 1.5039236885149674, "learning_rate": 8.176268180320389e-08, "loss": 0.4956, "step": 5111 }, { "epoch": 0.8247821878025169, "grad_norm": 1.2298484086229668, "learning_rate": 8.161654479953001e-08, "loss": 0.461, "step": 5112 }, { "epoch": 0.824943530171023, "grad_norm": 1.3856016145555812, "learning_rate": 8.147052690322598e-08, "loss": 0.4386, "step": 5113 }, { "epoch": 0.8251048725395289, "grad_norm": 1.1892236447512354, "learning_rate": 8.132462815586083e-08, "loss": 0.3393, "step": 5114 }, { "epoch": 0.8252662149080349, "grad_norm": 1.5139236339272213, "learning_rate": 8.117884859896994e-08, "loss": 0.5552, "step": 5115 }, { "epoch": 0.8254275572765408, "grad_norm": 1.840224240881835, "learning_rate": 8.10331882740542e-08, "loss": 0.5264, "step": 5116 }, { "epoch": 0.8255888996450468, "grad_norm": 1.888891386828298, "learning_rate": 8.088764722258095e-08, "loss": 0.5819, "step": 5117 }, { "epoch": 0.8257502420135527, "grad_norm": 1.801153314180859, "learning_rate": 8.074222548598369e-08, "loss": 0.5974, "step": 5118 }, { "epoch": 0.8259115843820587, "grad_norm": 1.8120140871296164, "learning_rate": 8.059692310566151e-08, "loss": 0.4775, "step": 5119 }, { "epoch": 0.8260729267505647, "grad_norm": 1.7480963161871585, "learning_rate": 8.045174012298006e-08, "loss": 0.5898, "step": 5120 }, { "epoch": 0.8262342691190707, "grad_norm": 2.5009819009851264, "learning_rate": 8.030667657927037e-08, "loss": 0.6707, "step": 5121 }, { "epoch": 0.8263956114875767, "grad_norm": 1.7143599885790248, "learning_rate": 8.016173251583002e-08, "loss": 0.4897, "step": 5122 }, { "epoch": 0.8265569538560826, "grad_norm": 1.7332577930397943, "learning_rate": 8.001690797392235e-08, "loss": 0.4784, "step": 5123 }, { "epoch": 0.8267182962245886, "grad_norm": 2.0715455036326396, "learning_rate": 7.987220299477665e-08, "loss": 0.6892, "step": 5124 }, { "epoch": 0.8268796385930945, "grad_norm": 1.418572247499052, "learning_rate": 7.972761761958835e-08, "loss": 0.4731, "step": 5125 }, { "epoch": 0.8270409809616005, "grad_norm": 1.4722574437724567, "learning_rate": 7.958315188951848e-08, "loss": 0.4248, "step": 5126 }, { "epoch": 0.8272023233301065, "grad_norm": 1.7473657399830058, "learning_rate": 7.943880584569434e-08, "loss": 0.5269, "step": 5127 }, { "epoch": 0.8273636656986124, "grad_norm": 1.662476190955827, "learning_rate": 7.929457952920914e-08, "loss": 0.4586, "step": 5128 }, { "epoch": 0.8275250080671184, "grad_norm": 1.3432555065059912, "learning_rate": 7.915047298112171e-08, "loss": 0.4335, "step": 5129 }, { "epoch": 0.8276863504356244, "grad_norm": 1.5274089984664196, "learning_rate": 7.900648624245704e-08, "loss": 0.4126, "step": 5130 }, { "epoch": 0.8278476928041304, "grad_norm": 1.510513732246677, "learning_rate": 7.886261935420607e-08, "loss": 0.4598, "step": 5131 }, { "epoch": 0.8280090351726364, "grad_norm": 1.7796764450466707, "learning_rate": 7.871887235732533e-08, "loss": 0.5092, "step": 5132 }, { "epoch": 0.8281703775411423, "grad_norm": 1.422323302975113, "learning_rate": 7.857524529273762e-08, "loss": 0.4897, "step": 5133 }, { "epoch": 0.8283317199096483, "grad_norm": 1.7666201192493802, "learning_rate": 7.843173820133104e-08, "loss": 0.4734, "step": 5134 }, { "epoch": 0.8284930622781542, "grad_norm": 1.5121438072027076, "learning_rate": 7.828835112395998e-08, "loss": 0.3137, "step": 5135 }, { "epoch": 0.8286544046466602, "grad_norm": 1.6664155611975418, "learning_rate": 7.814508410144482e-08, "loss": 0.4716, "step": 5136 }, { "epoch": 0.8288157470151661, "grad_norm": 1.8578783952401847, "learning_rate": 7.800193717457116e-08, "loss": 0.5163, "step": 5137 }, { "epoch": 0.8289770893836722, "grad_norm": 1.3840712943952906, "learning_rate": 7.785891038409098e-08, "loss": 0.4269, "step": 5138 }, { "epoch": 0.8291384317521782, "grad_norm": 1.6748199907054129, "learning_rate": 7.771600377072162e-08, "loss": 0.3924, "step": 5139 }, { "epoch": 0.8292997741206841, "grad_norm": 1.4768127552271022, "learning_rate": 7.757321737514644e-08, "loss": 0.4612, "step": 5140 }, { "epoch": 0.8294611164891901, "grad_norm": 1.7338119142639374, "learning_rate": 7.743055123801474e-08, "loss": 0.531, "step": 5141 }, { "epoch": 0.829622458857696, "grad_norm": 1.3910018753104934, "learning_rate": 7.728800539994113e-08, "loss": 0.4701, "step": 5142 }, { "epoch": 0.829783801226202, "grad_norm": 1.6376306001255752, "learning_rate": 7.714557990150633e-08, "loss": 0.4308, "step": 5143 }, { "epoch": 0.829945143594708, "grad_norm": 2.077307174492614, "learning_rate": 7.70032747832568e-08, "loss": 0.5639, "step": 5144 }, { "epoch": 0.8301064859632139, "grad_norm": 1.7800407990055362, "learning_rate": 7.686109008570446e-08, "loss": 0.4193, "step": 5145 }, { "epoch": 0.8302678283317199, "grad_norm": 1.7166525093284934, "learning_rate": 7.671902584932716e-08, "loss": 0.5434, "step": 5146 }, { "epoch": 0.8304291707002259, "grad_norm": 1.3812788740221231, "learning_rate": 7.657708211456848e-08, "loss": 0.4418, "step": 5147 }, { "epoch": 0.8305905130687319, "grad_norm": 1.5724980721514943, "learning_rate": 7.643525892183761e-08, "loss": 0.3294, "step": 5148 }, { "epoch": 0.8307518554372378, "grad_norm": 1.8884068237014948, "learning_rate": 7.629355631150952e-08, "loss": 0.5497, "step": 5149 }, { "epoch": 0.8309131978057438, "grad_norm": 1.3947252838466517, "learning_rate": 7.615197432392462e-08, "loss": 0.3325, "step": 5150 }, { "epoch": 0.8310745401742498, "grad_norm": 1.909252990720314, "learning_rate": 7.601051299938926e-08, "loss": 0.5813, "step": 5151 }, { "epoch": 0.8312358825427557, "grad_norm": 1.449222204816047, "learning_rate": 7.586917237817519e-08, "loss": 0.4099, "step": 5152 }, { "epoch": 0.8313972249112617, "grad_norm": 1.6337706752364896, "learning_rate": 7.572795250052e-08, "loss": 0.5695, "step": 5153 }, { "epoch": 0.8315585672797676, "grad_norm": 1.3975905512235418, "learning_rate": 7.558685340662696e-08, "loss": 0.3723, "step": 5154 }, { "epoch": 0.8317199096482737, "grad_norm": 1.400728490870959, "learning_rate": 7.544587513666457e-08, "loss": 0.439, "step": 5155 }, { "epoch": 0.8318812520167796, "grad_norm": 1.6391140474226844, "learning_rate": 7.530501773076725e-08, "loss": 0.5111, "step": 5156 }, { "epoch": 0.8320425943852856, "grad_norm": 1.5814783707911637, "learning_rate": 7.516428122903518e-08, "loss": 0.5804, "step": 5157 }, { "epoch": 0.8322039367537916, "grad_norm": 1.7171760287956035, "learning_rate": 7.502366567153345e-08, "loss": 0.4581, "step": 5158 }, { "epoch": 0.8323652791222975, "grad_norm": 1.397404976823286, "learning_rate": 7.488317109829367e-08, "loss": 0.3286, "step": 5159 }, { "epoch": 0.8325266214908035, "grad_norm": 2.1163784606844014, "learning_rate": 7.474279754931212e-08, "loss": 0.6138, "step": 5160 }, { "epoch": 0.8326879638593094, "grad_norm": 1.1925110244241384, "learning_rate": 7.46025450645511e-08, "loss": 0.4517, "step": 5161 }, { "epoch": 0.8328493062278154, "grad_norm": 1.221233820188203, "learning_rate": 7.446241368393858e-08, "loss": 0.4126, "step": 5162 }, { "epoch": 0.8330106485963213, "grad_norm": 1.394006454069721, "learning_rate": 7.43224034473674e-08, "loss": 0.5162, "step": 5163 }, { "epoch": 0.8331719909648274, "grad_norm": 1.9654265917662939, "learning_rate": 7.418251439469675e-08, "loss": 0.4244, "step": 5164 }, { "epoch": 0.8333333333333334, "grad_norm": 1.6307055737731937, "learning_rate": 7.404274656575055e-08, "loss": 0.5609, "step": 5165 }, { "epoch": 0.8334946757018393, "grad_norm": 1.124584492139489, "learning_rate": 7.390310000031874e-08, "loss": 0.4027, "step": 5166 }, { "epoch": 0.8336560180703453, "grad_norm": 1.8242254154973916, "learning_rate": 7.376357473815664e-08, "loss": 0.5258, "step": 5167 }, { "epoch": 0.8338173604388512, "grad_norm": 1.4473347306323119, "learning_rate": 7.36241708189847e-08, "loss": 0.3293, "step": 5168 }, { "epoch": 0.8339787028073572, "grad_norm": 1.7134844491173369, "learning_rate": 7.348488828248928e-08, "loss": 0.4993, "step": 5169 }, { "epoch": 0.8341400451758632, "grad_norm": 1.2997802915512036, "learning_rate": 7.334572716832199e-08, "loss": 0.3405, "step": 5170 }, { "epoch": 0.8343013875443691, "grad_norm": 1.6727534330484182, "learning_rate": 7.320668751609976e-08, "loss": 0.5915, "step": 5171 }, { "epoch": 0.8344627299128751, "grad_norm": 2.1260185044812014, "learning_rate": 7.306776936540526e-08, "loss": 0.7426, "step": 5172 }, { "epoch": 0.8346240722813811, "grad_norm": 2.0181499900463837, "learning_rate": 7.292897275578613e-08, "loss": 0.5126, "step": 5173 }, { "epoch": 0.8347854146498871, "grad_norm": 1.5815049037610087, "learning_rate": 7.279029772675571e-08, "loss": 0.525, "step": 5174 }, { "epoch": 0.834946757018393, "grad_norm": 1.1611778821612069, "learning_rate": 7.265174431779275e-08, "loss": 0.3153, "step": 5175 }, { "epoch": 0.835108099386899, "grad_norm": 1.5455257812442738, "learning_rate": 7.25133125683412e-08, "loss": 0.4058, "step": 5176 }, { "epoch": 0.835269441755405, "grad_norm": 3.301268917377642, "learning_rate": 7.237500251781053e-08, "loss": 0.5069, "step": 5177 }, { "epoch": 0.8354307841239109, "grad_norm": 1.3470366007073915, "learning_rate": 7.223681420557537e-08, "loss": 0.4186, "step": 5178 }, { "epoch": 0.8355921264924169, "grad_norm": 1.3563704336109026, "learning_rate": 7.209874767097585e-08, "loss": 0.2791, "step": 5179 }, { "epoch": 0.8357534688609228, "grad_norm": 1.1133431400697107, "learning_rate": 7.196080295331758e-08, "loss": 0.4139, "step": 5180 }, { "epoch": 0.8359148112294289, "grad_norm": 1.2778581056132015, "learning_rate": 7.1822980091871e-08, "loss": 0.2808, "step": 5181 }, { "epoch": 0.8360761535979349, "grad_norm": 1.7224075241622616, "learning_rate": 7.168527912587253e-08, "loss": 0.5059, "step": 5182 }, { "epoch": 0.8362374959664408, "grad_norm": 1.3216074174458081, "learning_rate": 7.154770009452321e-08, "loss": 0.4272, "step": 5183 }, { "epoch": 0.8363988383349468, "grad_norm": 1.6469735248200348, "learning_rate": 7.141024303698983e-08, "loss": 0.5035, "step": 5184 }, { "epoch": 0.8365601807034527, "grad_norm": 1.455114090238199, "learning_rate": 7.127290799240443e-08, "loss": 0.3441, "step": 5185 }, { "epoch": 0.8367215230719587, "grad_norm": 1.8650459553883516, "learning_rate": 7.1135694999864e-08, "loss": 0.5805, "step": 5186 }, { "epoch": 0.8368828654404646, "grad_norm": 1.4128250693251703, "learning_rate": 7.0998604098431e-08, "loss": 0.4115, "step": 5187 }, { "epoch": 0.8370442078089706, "grad_norm": 1.6659832109381558, "learning_rate": 7.08616353271333e-08, "loss": 0.4989, "step": 5188 }, { "epoch": 0.8372055501774766, "grad_norm": 2.153827467381458, "learning_rate": 7.072478872496351e-08, "loss": 0.5705, "step": 5189 }, { "epoch": 0.8373668925459826, "grad_norm": 2.0986178163726663, "learning_rate": 7.058806433088011e-08, "loss": 0.4453, "step": 5190 }, { "epoch": 0.8375282349144886, "grad_norm": 1.2436385406698673, "learning_rate": 7.04514621838061e-08, "loss": 0.5028, "step": 5191 }, { "epoch": 0.8376895772829945, "grad_norm": 1.6124474539068607, "learning_rate": 7.031498232263022e-08, "loss": 0.4999, "step": 5192 }, { "epoch": 0.8378509196515005, "grad_norm": 1.760790662896905, "learning_rate": 7.017862478620617e-08, "loss": 0.4807, "step": 5193 }, { "epoch": 0.8380122620200064, "grad_norm": 1.6917629790802478, "learning_rate": 7.004238961335284e-08, "loss": 0.4704, "step": 5194 }, { "epoch": 0.8381736043885124, "grad_norm": 2.509897761447406, "learning_rate": 6.990627684285439e-08, "loss": 0.8597, "step": 5195 }, { "epoch": 0.8383349467570184, "grad_norm": 2.5108968716024553, "learning_rate": 6.97702865134599e-08, "loss": 0.4845, "step": 5196 }, { "epoch": 0.8384962891255243, "grad_norm": 1.747146391558116, "learning_rate": 6.963441866388376e-08, "loss": 0.6055, "step": 5197 }, { "epoch": 0.8386576314940304, "grad_norm": 1.3387598931012024, "learning_rate": 6.949867333280567e-08, "loss": 0.3967, "step": 5198 }, { "epoch": 0.8388189738625363, "grad_norm": 1.9470713067929928, "learning_rate": 6.936305055886992e-08, "loss": 0.3906, "step": 5199 }, { "epoch": 0.8389803162310423, "grad_norm": 1.6043501992532279, "learning_rate": 6.922755038068645e-08, "loss": 0.5693, "step": 5200 }, { "epoch": 0.8391416585995483, "grad_norm": 1.7589896774466216, "learning_rate": 6.909217283683017e-08, "loss": 0.5549, "step": 5201 }, { "epoch": 0.8393030009680542, "grad_norm": 1.5636306486620108, "learning_rate": 6.895691796584069e-08, "loss": 0.4488, "step": 5202 }, { "epoch": 0.8394643433365602, "grad_norm": 1.625374457423985, "learning_rate": 6.882178580622333e-08, "loss": 0.5108, "step": 5203 }, { "epoch": 0.8396256857050661, "grad_norm": 1.2254389093363414, "learning_rate": 6.868677639644782e-08, "loss": 0.4786, "step": 5204 }, { "epoch": 0.8397870280735721, "grad_norm": 1.6054705320473686, "learning_rate": 6.855188977494963e-08, "loss": 0.4632, "step": 5205 }, { "epoch": 0.839948370442078, "grad_norm": 1.8280450526305476, "learning_rate": 6.841712598012866e-08, "loss": 0.5233, "step": 5206 }, { "epoch": 0.8401097128105841, "grad_norm": 1.9215178739384946, "learning_rate": 6.828248505035017e-08, "loss": 0.4736, "step": 5207 }, { "epoch": 0.8402710551790901, "grad_norm": 1.4955753394369695, "learning_rate": 6.814796702394449e-08, "loss": 0.545, "step": 5208 }, { "epoch": 0.840432397547596, "grad_norm": 1.1823288607803581, "learning_rate": 6.801357193920664e-08, "loss": 0.4868, "step": 5209 }, { "epoch": 0.840593739916102, "grad_norm": 1.386773895456666, "learning_rate": 6.787929983439694e-08, "loss": 0.3239, "step": 5210 }, { "epoch": 0.8407550822846079, "grad_norm": 1.9141648245727727, "learning_rate": 6.774515074774073e-08, "loss": 0.5743, "step": 5211 }, { "epoch": 0.8409164246531139, "grad_norm": 2.890271139394418, "learning_rate": 6.761112471742802e-08, "loss": 0.4758, "step": 5212 }, { "epoch": 0.8410777670216198, "grad_norm": 1.764029262827662, "learning_rate": 6.747722178161408e-08, "loss": 0.4483, "step": 5213 }, { "epoch": 0.8412391093901258, "grad_norm": 1.625173852863893, "learning_rate": 6.73434419784189e-08, "loss": 0.6328, "step": 5214 }, { "epoch": 0.8414004517586318, "grad_norm": 1.457770929428687, "learning_rate": 6.720978534592753e-08, "loss": 0.386, "step": 5215 }, { "epoch": 0.8415617941271378, "grad_norm": 1.354309377730857, "learning_rate": 6.70762519221903e-08, "loss": 0.4606, "step": 5216 }, { "epoch": 0.8417231364956438, "grad_norm": 2.293585881070874, "learning_rate": 6.694284174522174e-08, "loss": 0.5985, "step": 5217 }, { "epoch": 0.8418844788641497, "grad_norm": 1.399249986292444, "learning_rate": 6.6809554853002e-08, "loss": 0.4548, "step": 5218 }, { "epoch": 0.8420458212326557, "grad_norm": 1.5001473354459736, "learning_rate": 6.667639128347552e-08, "loss": 0.4421, "step": 5219 }, { "epoch": 0.8422071636011617, "grad_norm": 4.110316411668124, "learning_rate": 6.654335107455206e-08, "loss": 0.4943, "step": 5220 }, { "epoch": 0.8423685059696676, "grad_norm": 1.2907558070068244, "learning_rate": 6.641043426410631e-08, "loss": 0.4431, "step": 5221 }, { "epoch": 0.8425298483381736, "grad_norm": 1.6516845686885768, "learning_rate": 6.627764088997734e-08, "loss": 0.3827, "step": 5222 }, { "epoch": 0.8426911907066795, "grad_norm": 2.024028323604845, "learning_rate": 6.614497098996957e-08, "loss": 0.4812, "step": 5223 }, { "epoch": 0.8428525330751856, "grad_norm": 1.2752097966988094, "learning_rate": 6.601242460185214e-08, "loss": 0.4962, "step": 5224 }, { "epoch": 0.8430138754436916, "grad_norm": 2.0505748826599217, "learning_rate": 6.588000176335883e-08, "loss": 0.6495, "step": 5225 }, { "epoch": 0.8431752178121975, "grad_norm": 1.5467400540145695, "learning_rate": 6.574770251218859e-08, "loss": 0.454, "step": 5226 }, { "epoch": 0.8433365601807035, "grad_norm": 1.1592417493884033, "learning_rate": 6.561552688600464e-08, "loss": 0.3673, "step": 5227 }, { "epoch": 0.8434979025492094, "grad_norm": 1.7951676591657868, "learning_rate": 6.548347492243567e-08, "loss": 0.5057, "step": 5228 }, { "epoch": 0.8436592449177154, "grad_norm": 2.163649757218488, "learning_rate": 6.535154665907489e-08, "loss": 0.5821, "step": 5229 }, { "epoch": 0.8438205872862213, "grad_norm": 1.3778659689571913, "learning_rate": 6.521974213348009e-08, "loss": 0.515, "step": 5230 }, { "epoch": 0.8439819296547273, "grad_norm": 1.9371031077784153, "learning_rate": 6.508806138317414e-08, "loss": 0.4774, "step": 5231 }, { "epoch": 0.8441432720232332, "grad_norm": 1.605143121892002, "learning_rate": 6.495650444564433e-08, "loss": 0.4239, "step": 5232 }, { "epoch": 0.8443046143917393, "grad_norm": 1.7057851612325416, "learning_rate": 6.482507135834302e-08, "loss": 0.5076, "step": 5233 }, { "epoch": 0.8444659567602453, "grad_norm": 1.2701968751582102, "learning_rate": 6.469376215868733e-08, "loss": 0.4068, "step": 5234 }, { "epoch": 0.8446272991287512, "grad_norm": 1.9574193512537363, "learning_rate": 6.456257688405875e-08, "loss": 0.7211, "step": 5235 }, { "epoch": 0.8447886414972572, "grad_norm": 2.1989302505249193, "learning_rate": 6.443151557180388e-08, "loss": 0.6729, "step": 5236 }, { "epoch": 0.8449499838657631, "grad_norm": 1.5376944101128196, "learning_rate": 6.430057825923386e-08, "loss": 0.4212, "step": 5237 }, { "epoch": 0.8451113262342691, "grad_norm": 1.4289197684389492, "learning_rate": 6.41697649836243e-08, "loss": 0.4539, "step": 5238 }, { "epoch": 0.8452726686027751, "grad_norm": 2.2137736000771806, "learning_rate": 6.403907578221618e-08, "loss": 0.5619, "step": 5239 }, { "epoch": 0.845434010971281, "grad_norm": 1.5777176435111844, "learning_rate": 6.390851069221437e-08, "loss": 0.4751, "step": 5240 }, { "epoch": 0.8455953533397871, "grad_norm": 1.7539948099118285, "learning_rate": 6.377806975078887e-08, "loss": 0.5472, "step": 5241 }, { "epoch": 0.845756695708293, "grad_norm": 1.662739832508487, "learning_rate": 6.36477529950743e-08, "loss": 0.4135, "step": 5242 }, { "epoch": 0.845918038076799, "grad_norm": 1.5244144535003101, "learning_rate": 6.351756046216966e-08, "loss": 0.3796, "step": 5243 }, { "epoch": 0.846079380445305, "grad_norm": 1.5189601737069545, "learning_rate": 6.338749218913897e-08, "loss": 0.4834, "step": 5244 }, { "epoch": 0.8462407228138109, "grad_norm": 2.0505766266962873, "learning_rate": 6.32575482130105e-08, "loss": 0.5248, "step": 5245 }, { "epoch": 0.8464020651823169, "grad_norm": 1.625497081729331, "learning_rate": 6.312772857077736e-08, "loss": 0.3498, "step": 5246 }, { "epoch": 0.8465634075508228, "grad_norm": 1.2142409027870955, "learning_rate": 6.299803329939734e-08, "loss": 0.5859, "step": 5247 }, { "epoch": 0.8467247499193288, "grad_norm": 1.8256918862851588, "learning_rate": 6.286846243579252e-08, "loss": 0.4472, "step": 5248 }, { "epoch": 0.8468860922878347, "grad_norm": 1.4666658589331976, "learning_rate": 6.273901601684989e-08, "loss": 0.471, "step": 5249 }, { "epoch": 0.8470474346563408, "grad_norm": 1.1971104880755161, "learning_rate": 6.260969407942074e-08, "loss": 0.4455, "step": 5250 }, { "epoch": 0.8472087770248468, "grad_norm": 1.8139802215869036, "learning_rate": 6.248049666032113e-08, "loss": 0.4135, "step": 5251 }, { "epoch": 0.8473701193933527, "grad_norm": 1.4332210877551685, "learning_rate": 6.235142379633169e-08, "loss": 0.4852, "step": 5252 }, { "epoch": 0.8475314617618587, "grad_norm": 1.9019486097218365, "learning_rate": 6.222247552419723e-08, "loss": 0.395, "step": 5253 }, { "epoch": 0.8476928041303646, "grad_norm": 1.5796897608951195, "learning_rate": 6.209365188062748e-08, "loss": 0.4356, "step": 5254 }, { "epoch": 0.8478541464988706, "grad_norm": 1.415374989779859, "learning_rate": 6.196495290229675e-08, "loss": 0.3755, "step": 5255 }, { "epoch": 0.8480154888673765, "grad_norm": 1.585113837669772, "learning_rate": 6.183637862584335e-08, "loss": 0.472, "step": 5256 }, { "epoch": 0.8481768312358825, "grad_norm": 1.9931015610415699, "learning_rate": 6.170792908787065e-08, "loss": 0.5151, "step": 5257 }, { "epoch": 0.8483381736043886, "grad_norm": 1.1809130233404441, "learning_rate": 6.157960432494608e-08, "loss": 0.279, "step": 5258 }, { "epoch": 0.8484995159728945, "grad_norm": 1.4244395174393236, "learning_rate": 6.145140437360185e-08, "loss": 0.4091, "step": 5259 }, { "epoch": 0.8486608583414005, "grad_norm": 2.061761203573072, "learning_rate": 6.132332927033456e-08, "loss": 0.5347, "step": 5260 }, { "epoch": 0.8488222007099064, "grad_norm": 1.2874710431592518, "learning_rate": 6.119537905160499e-08, "loss": 0.4072, "step": 5261 }, { "epoch": 0.8489835430784124, "grad_norm": 1.6843643726020692, "learning_rate": 6.106755375383904e-08, "loss": 0.416, "step": 5262 }, { "epoch": 0.8491448854469184, "grad_norm": 1.7476460428839375, "learning_rate": 6.093985341342622e-08, "loss": 0.4962, "step": 5263 }, { "epoch": 0.8493062278154243, "grad_norm": 1.825626981499735, "learning_rate": 6.081227806672107e-08, "loss": 0.542, "step": 5264 }, { "epoch": 0.8494675701839303, "grad_norm": 1.9990325018113988, "learning_rate": 6.068482775004235e-08, "loss": 0.6552, "step": 5265 }, { "epoch": 0.8496289125524362, "grad_norm": 1.1646473586624269, "learning_rate": 6.055750249967306e-08, "loss": 0.3181, "step": 5266 }, { "epoch": 0.8497902549209423, "grad_norm": 1.6523015442997346, "learning_rate": 6.043030235186086e-08, "loss": 0.5602, "step": 5267 }, { "epoch": 0.8499515972894482, "grad_norm": 1.3173049439985767, "learning_rate": 6.030322734281779e-08, "loss": 0.2962, "step": 5268 }, { "epoch": 0.8501129396579542, "grad_norm": 1.7398298930184597, "learning_rate": 6.01762775087199e-08, "loss": 0.4998, "step": 5269 }, { "epoch": 0.8502742820264602, "grad_norm": 1.318176030726269, "learning_rate": 6.004945288570812e-08, "loss": 0.2978, "step": 5270 }, { "epoch": 0.8504356243949661, "grad_norm": 2.1522289875541043, "learning_rate": 5.992275350988729e-08, "loss": 0.573, "step": 5271 }, { "epoch": 0.8505969667634721, "grad_norm": 2.0384604782079587, "learning_rate": 5.979617941732684e-08, "loss": 0.5671, "step": 5272 }, { "epoch": 0.850758309131978, "grad_norm": 1.2588394900713118, "learning_rate": 5.96697306440605e-08, "loss": 0.3424, "step": 5273 }, { "epoch": 0.850919651500484, "grad_norm": 2.2751350488507343, "learning_rate": 5.954340722608631e-08, "loss": 0.4134, "step": 5274 }, { "epoch": 0.8510809938689899, "grad_norm": 1.9200230180830833, "learning_rate": 5.941720919936671e-08, "loss": 0.5718, "step": 5275 }, { "epoch": 0.851242336237496, "grad_norm": 1.1784162429514948, "learning_rate": 5.92911365998281e-08, "loss": 0.3334, "step": 5276 }, { "epoch": 0.851403678606002, "grad_norm": 1.3809749225795225, "learning_rate": 5.916518946336163e-08, "loss": 0.3814, "step": 5277 }, { "epoch": 0.8515650209745079, "grad_norm": 1.3773390208924237, "learning_rate": 5.9039367825822526e-08, "loss": 0.4417, "step": 5278 }, { "epoch": 0.8517263633430139, "grad_norm": 1.5138305580886209, "learning_rate": 5.8913671723030135e-08, "loss": 0.3953, "step": 5279 }, { "epoch": 0.8518877057115198, "grad_norm": 1.6340883371948114, "learning_rate": 5.878810119076827e-08, "loss": 0.4033, "step": 5280 }, { "epoch": 0.8520490480800258, "grad_norm": 2.0867536569729874, "learning_rate": 5.8662656264785117e-08, "loss": 0.6119, "step": 5281 }, { "epoch": 0.8522103904485318, "grad_norm": 1.3475832712067568, "learning_rate": 5.853733698079261e-08, "loss": 0.4481, "step": 5282 }, { "epoch": 0.8523717328170377, "grad_norm": 1.179064200749145, "learning_rate": 5.841214337446754e-08, "loss": 0.3179, "step": 5283 }, { "epoch": 0.8525330751855438, "grad_norm": 1.4375228880013813, "learning_rate": 5.828707548145029e-08, "loss": 0.4373, "step": 5284 }, { "epoch": 0.8526944175540497, "grad_norm": 1.3290544287111132, "learning_rate": 5.81621333373461e-08, "loss": 0.398, "step": 5285 }, { "epoch": 0.8528557599225557, "grad_norm": 1.6785351015931342, "learning_rate": 5.8037316977723904e-08, "loss": 0.5545, "step": 5286 }, { "epoch": 0.8530171022910616, "grad_norm": 1.7193232880681377, "learning_rate": 5.791262643811701e-08, "loss": 0.4434, "step": 5287 }, { "epoch": 0.8531784446595676, "grad_norm": 1.7185496820410968, "learning_rate": 5.778806175402306e-08, "loss": 0.5195, "step": 5288 }, { "epoch": 0.8533397870280736, "grad_norm": 1.6425384262676315, "learning_rate": 5.766362296090349e-08, "loss": 0.4537, "step": 5289 }, { "epoch": 0.8535011293965795, "grad_norm": 2.4616237107516983, "learning_rate": 5.7539310094184223e-08, "loss": 0.6226, "step": 5290 }, { "epoch": 0.8536624717650855, "grad_norm": 1.8823593670721326, "learning_rate": 5.74151231892554e-08, "loss": 0.5444, "step": 5291 }, { "epoch": 0.8538238141335914, "grad_norm": 1.1628376193615257, "learning_rate": 5.7291062281470795e-08, "loss": 0.3781, "step": 5292 }, { "epoch": 0.8539851565020975, "grad_norm": 1.7202912789265037, "learning_rate": 5.716712740614887e-08, "loss": 0.5454, "step": 5293 }, { "epoch": 0.8541464988706035, "grad_norm": 1.4739705906370684, "learning_rate": 5.704331859857209e-08, "loss": 0.557, "step": 5294 }, { "epoch": 0.8543078412391094, "grad_norm": 1.48804974695336, "learning_rate": 5.6919635893986626e-08, "loss": 0.4767, "step": 5295 }, { "epoch": 0.8544691836076154, "grad_norm": 1.6358543425869316, "learning_rate": 5.679607932760322e-08, "loss": 0.4883, "step": 5296 }, { "epoch": 0.8546305259761213, "grad_norm": 2.031611953178042, "learning_rate": 5.6672648934596554e-08, "loss": 0.6439, "step": 5297 }, { "epoch": 0.8547918683446273, "grad_norm": 1.5816918279792334, "learning_rate": 5.654934475010542e-08, "loss": 0.4887, "step": 5298 }, { "epoch": 0.8549532107131332, "grad_norm": 1.8852822341904996, "learning_rate": 5.642616680923251e-08, "loss": 0.4667, "step": 5299 }, { "epoch": 0.8551145530816392, "grad_norm": 1.598718117760649, "learning_rate": 5.630311514704472e-08, "loss": 0.4127, "step": 5300 }, { "epoch": 0.8552758954501453, "grad_norm": 1.9717330131858282, "learning_rate": 5.618018979857309e-08, "loss": 0.6, "step": 5301 }, { "epoch": 0.8554372378186512, "grad_norm": 1.5211278377684856, "learning_rate": 5.605739079881239e-08, "loss": 0.5448, "step": 5302 }, { "epoch": 0.8555985801871572, "grad_norm": 1.5259540606492032, "learning_rate": 5.5934718182721776e-08, "loss": 0.4386, "step": 5303 }, { "epoch": 0.8557599225556631, "grad_norm": 1.2440333537233181, "learning_rate": 5.581217198522431e-08, "loss": 0.3739, "step": 5304 }, { "epoch": 0.8559212649241691, "grad_norm": 1.6472838645252728, "learning_rate": 5.568975224120681e-08, "loss": 0.3678, "step": 5305 }, { "epoch": 0.856082607292675, "grad_norm": 2.3740386020893185, "learning_rate": 5.5567458985520456e-08, "loss": 0.4811, "step": 5306 }, { "epoch": 0.856243949661181, "grad_norm": 1.369887340187584, "learning_rate": 5.544529225298017e-08, "loss": 0.3414, "step": 5307 }, { "epoch": 0.856405292029687, "grad_norm": 1.7660889142680745, "learning_rate": 5.532325207836508e-08, "loss": 0.4897, "step": 5308 }, { "epoch": 0.8565666343981929, "grad_norm": 1.3374388440134881, "learning_rate": 5.520133849641817e-08, "loss": 0.3438, "step": 5309 }, { "epoch": 0.856727976766699, "grad_norm": 1.5406767613998973, "learning_rate": 5.507955154184618e-08, "loss": 0.4648, "step": 5310 }, { "epoch": 0.8568893191352049, "grad_norm": 1.5704457999820154, "learning_rate": 5.495789124932021e-08, "loss": 0.5017, "step": 5311 }, { "epoch": 0.8570506615037109, "grad_norm": 1.6876650835457137, "learning_rate": 5.483635765347494e-08, "loss": 0.3796, "step": 5312 }, { "epoch": 0.8572120038722169, "grad_norm": 1.4281755801536422, "learning_rate": 5.4714950788909084e-08, "loss": 0.4824, "step": 5313 }, { "epoch": 0.8573733462407228, "grad_norm": 1.8820654308972866, "learning_rate": 5.459367069018555e-08, "loss": 0.4924, "step": 5314 }, { "epoch": 0.8575346886092288, "grad_norm": 1.7198444696457638, "learning_rate": 5.4472517391830674e-08, "loss": 0.4897, "step": 5315 }, { "epoch": 0.8576960309777347, "grad_norm": 1.8819415978867264, "learning_rate": 5.435149092833507e-08, "loss": 0.5469, "step": 5316 }, { "epoch": 0.8578573733462407, "grad_norm": 1.5221652465020648, "learning_rate": 5.42305913341532e-08, "loss": 0.3732, "step": 5317 }, { "epoch": 0.8580187157147466, "grad_norm": 1.3900503192685407, "learning_rate": 5.410981864370312e-08, "loss": 0.5767, "step": 5318 }, { "epoch": 0.8581800580832527, "grad_norm": 1.3931816687167327, "learning_rate": 5.398917289136712e-08, "loss": 0.4007, "step": 5319 }, { "epoch": 0.8583414004517587, "grad_norm": 1.6850759969037559, "learning_rate": 5.386865411149111e-08, "loss": 0.559, "step": 5320 }, { "epoch": 0.8585027428202646, "grad_norm": 1.3994757777765618, "learning_rate": 5.3748262338385034e-08, "loss": 0.4623, "step": 5321 }, { "epoch": 0.8586640851887706, "grad_norm": 1.4048796864918562, "learning_rate": 5.3627997606322586e-08, "loss": 0.5437, "step": 5322 }, { "epoch": 0.8588254275572765, "grad_norm": 2.3014121364518485, "learning_rate": 5.3507859949541215e-08, "loss": 0.561, "step": 5323 }, { "epoch": 0.8589867699257825, "grad_norm": 1.822745861952347, "learning_rate": 5.338784940224239e-08, "loss": 0.4535, "step": 5324 }, { "epoch": 0.8591481122942884, "grad_norm": 1.6942360972250174, "learning_rate": 5.3267965998591126e-08, "loss": 0.5731, "step": 5325 }, { "epoch": 0.8593094546627944, "grad_norm": 1.5301810445444852, "learning_rate": 5.3148209772716445e-08, "loss": 0.3539, "step": 5326 }, { "epoch": 0.8594707970313005, "grad_norm": 1.2826328258626978, "learning_rate": 5.30285807587113e-08, "loss": 0.4456, "step": 5327 }, { "epoch": 0.8596321393998064, "grad_norm": 1.7774263132222772, "learning_rate": 5.2909078990631964e-08, "loss": 0.5247, "step": 5328 }, { "epoch": 0.8597934817683124, "grad_norm": 1.579387348762304, "learning_rate": 5.278970450249881e-08, "loss": 0.4429, "step": 5329 }, { "epoch": 0.8599548241368183, "grad_norm": 1.9438057698286348, "learning_rate": 5.267045732829606e-08, "loss": 0.4356, "step": 5330 }, { "epoch": 0.8601161665053243, "grad_norm": 1.2222789145576507, "learning_rate": 5.2551337501971494e-08, "loss": 0.4451, "step": 5331 }, { "epoch": 0.8602775088738303, "grad_norm": 1.1897961853098284, "learning_rate": 5.2432345057436824e-08, "loss": 0.5228, "step": 5332 }, { "epoch": 0.8604388512423362, "grad_norm": 1.5541242796544066, "learning_rate": 5.2313480028567166e-08, "loss": 0.4507, "step": 5333 }, { "epoch": 0.8606001936108422, "grad_norm": 1.6979031084451564, "learning_rate": 5.219474244920163e-08, "loss": 0.368, "step": 5334 }, { "epoch": 0.8607615359793481, "grad_norm": 1.5951334614657469, "learning_rate": 5.2076132353143175e-08, "loss": 0.5609, "step": 5335 }, { "epoch": 0.8609228783478542, "grad_norm": 1.194732974317257, "learning_rate": 5.195764977415801e-08, "loss": 0.2444, "step": 5336 }, { "epoch": 0.8610842207163601, "grad_norm": 1.2058150203889861, "learning_rate": 5.183929474597659e-08, "loss": 0.2531, "step": 5337 }, { "epoch": 0.8612455630848661, "grad_norm": 1.4623435694835742, "learning_rate": 5.1721067302292475e-08, "loss": 0.4721, "step": 5338 }, { "epoch": 0.8614069054533721, "grad_norm": 1.6452613351660677, "learning_rate": 5.16029674767634e-08, "loss": 0.3858, "step": 5339 }, { "epoch": 0.861568247821878, "grad_norm": 1.5385580252688362, "learning_rate": 5.148499530301065e-08, "loss": 0.4725, "step": 5340 }, { "epoch": 0.861729590190384, "grad_norm": 1.5702195780576134, "learning_rate": 5.1367150814618896e-08, "loss": 0.4664, "step": 5341 }, { "epoch": 0.8618909325588899, "grad_norm": 1.8446025493693134, "learning_rate": 5.124943404513676e-08, "loss": 0.5916, "step": 5342 }, { "epoch": 0.8620522749273959, "grad_norm": 1.8389354496631887, "learning_rate": 5.113184502807644e-08, "loss": 0.4053, "step": 5343 }, { "epoch": 0.862213617295902, "grad_norm": 2.103852453156666, "learning_rate": 5.101438379691364e-08, "loss": 0.625, "step": 5344 }, { "epoch": 0.8623749596644079, "grad_norm": 2.0605364613925246, "learning_rate": 5.089705038508796e-08, "loss": 0.4562, "step": 5345 }, { "epoch": 0.8625363020329139, "grad_norm": 1.5740331346303307, "learning_rate": 5.077984482600217e-08, "loss": 0.4886, "step": 5346 }, { "epoch": 0.8626976444014198, "grad_norm": 1.5897290237609796, "learning_rate": 5.066276715302304e-08, "loss": 0.391, "step": 5347 }, { "epoch": 0.8628589867699258, "grad_norm": 1.5457871590407581, "learning_rate": 5.054581739948088e-08, "loss": 0.4243, "step": 5348 }, { "epoch": 0.8630203291384317, "grad_norm": 1.1683352606608284, "learning_rate": 5.0428995598669364e-08, "loss": 0.3401, "step": 5349 }, { "epoch": 0.8631816715069377, "grad_norm": 2.322862142655685, "learning_rate": 5.031230178384593e-08, "loss": 0.6486, "step": 5350 }, { "epoch": 0.8633430138754437, "grad_norm": 1.4733604953097543, "learning_rate": 5.01957359882314e-08, "loss": 0.3595, "step": 5351 }, { "epoch": 0.8635043562439496, "grad_norm": 1.6915853991351246, "learning_rate": 5.007929824501039e-08, "loss": 0.4165, "step": 5352 }, { "epoch": 0.8636656986124557, "grad_norm": 1.178009152977557, "learning_rate": 4.996298858733094e-08, "loss": 0.4653, "step": 5353 }, { "epoch": 0.8638270409809616, "grad_norm": 1.4983962545595544, "learning_rate": 4.9846807048304626e-08, "loss": 0.4881, "step": 5354 }, { "epoch": 0.8639883833494676, "grad_norm": 1.4960338291688386, "learning_rate": 4.97307536610066e-08, "loss": 0.359, "step": 5355 }, { "epoch": 0.8641497257179735, "grad_norm": 1.6911877497616423, "learning_rate": 4.9614828458475366e-08, "loss": 0.4724, "step": 5356 }, { "epoch": 0.8643110680864795, "grad_norm": 1.7334872886559165, "learning_rate": 4.9499031473713094e-08, "loss": 0.4872, "step": 5357 }, { "epoch": 0.8644724104549855, "grad_norm": 1.773481528117945, "learning_rate": 4.938336273968557e-08, "loss": 0.6456, "step": 5358 }, { "epoch": 0.8646337528234914, "grad_norm": 1.2263653347671495, "learning_rate": 4.926782228932169e-08, "loss": 0.322, "step": 5359 }, { "epoch": 0.8647950951919974, "grad_norm": 1.4354343499957019, "learning_rate": 4.915241015551413e-08, "loss": 0.5292, "step": 5360 }, { "epoch": 0.8649564375605033, "grad_norm": 1.2090239140911814, "learning_rate": 4.9037126371119067e-08, "loss": 0.2319, "step": 5361 }, { "epoch": 0.8651177799290094, "grad_norm": 1.7019858093835616, "learning_rate": 4.892197096895584e-08, "loss": 0.5359, "step": 5362 }, { "epoch": 0.8652791222975154, "grad_norm": 1.6876669907066963, "learning_rate": 4.880694398180762e-08, "loss": 0.3521, "step": 5363 }, { "epoch": 0.8654404646660213, "grad_norm": 1.7161461266231335, "learning_rate": 4.869204544242067e-08, "loss": 0.4903, "step": 5364 }, { "epoch": 0.8656018070345273, "grad_norm": 1.4634340547614362, "learning_rate": 4.8577275383504924e-08, "loss": 0.439, "step": 5365 }, { "epoch": 0.8657631494030332, "grad_norm": 1.4216446742376267, "learning_rate": 4.846263383773364e-08, "loss": 0.326, "step": 5366 }, { "epoch": 0.8659244917715392, "grad_norm": 1.5578033814311627, "learning_rate": 4.834812083774353e-08, "loss": 0.3765, "step": 5367 }, { "epoch": 0.8660858341400451, "grad_norm": 1.6573023062251981, "learning_rate": 4.8233736416134815e-08, "loss": 0.484, "step": 5368 }, { "epoch": 0.8662471765085511, "grad_norm": 1.4189238891244491, "learning_rate": 4.811948060547072e-08, "loss": 0.5026, "step": 5369 }, { "epoch": 0.8664085188770572, "grad_norm": 1.8544250122567303, "learning_rate": 4.800535343827833e-08, "loss": 0.4767, "step": 5370 }, { "epoch": 0.8665698612455631, "grad_norm": 1.5777384218417632, "learning_rate": 4.789135494704788e-08, "loss": 0.4551, "step": 5371 }, { "epoch": 0.8667312036140691, "grad_norm": 2.3214493865871995, "learning_rate": 4.777748516423291e-08, "loss": 0.5795, "step": 5372 }, { "epoch": 0.866892545982575, "grad_norm": 1.6022410374255485, "learning_rate": 4.766374412225049e-08, "loss": 0.4838, "step": 5373 }, { "epoch": 0.867053888351081, "grad_norm": 1.7113906747915, "learning_rate": 4.7550131853480944e-08, "loss": 0.4949, "step": 5374 }, { "epoch": 0.867215230719587, "grad_norm": 1.6628873015413772, "learning_rate": 4.74366483902679e-08, "loss": 0.5359, "step": 5375 }, { "epoch": 0.8673765730880929, "grad_norm": 1.6388373672160004, "learning_rate": 4.7323293764918335e-08, "loss": 0.4088, "step": 5376 }, { "epoch": 0.8675379154565989, "grad_norm": 1.9490447492282004, "learning_rate": 4.7210068009702675e-08, "loss": 0.351, "step": 5377 }, { "epoch": 0.8676992578251048, "grad_norm": 1.8820252731564044, "learning_rate": 4.709697115685463e-08, "loss": 0.4018, "step": 5378 }, { "epoch": 0.8678606001936109, "grad_norm": 1.8167837099407445, "learning_rate": 4.698400323857088e-08, "loss": 0.5298, "step": 5379 }, { "epoch": 0.8680219425621168, "grad_norm": 1.4112882007196184, "learning_rate": 4.6871164287011876e-08, "loss": 0.3569, "step": 5380 }, { "epoch": 0.8681832849306228, "grad_norm": 1.315851609745542, "learning_rate": 4.675845433430115e-08, "loss": 0.4547, "step": 5381 }, { "epoch": 0.8683446272991288, "grad_norm": 1.43787379173368, "learning_rate": 4.66458734125253e-08, "loss": 0.3803, "step": 5382 }, { "epoch": 0.8685059696676347, "grad_norm": 1.595688930714485, "learning_rate": 4.6533421553734577e-08, "loss": 0.3499, "step": 5383 }, { "epoch": 0.8686673120361407, "grad_norm": 2.1093407522176366, "learning_rate": 4.6421098789942325e-08, "loss": 0.4979, "step": 5384 }, { "epoch": 0.8688286544046466, "grad_norm": 1.739636662135964, "learning_rate": 4.630890515312497e-08, "loss": 0.7143, "step": 5385 }, { "epoch": 0.8689899967731526, "grad_norm": 1.241550210104124, "learning_rate": 4.6196840675222394e-08, "loss": 0.3587, "step": 5386 }, { "epoch": 0.8691513391416587, "grad_norm": 1.6035903374275917, "learning_rate": 4.60849053881377e-08, "loss": 0.4295, "step": 5387 }, { "epoch": 0.8693126815101646, "grad_norm": 2.038175309492322, "learning_rate": 4.5973099323736945e-08, "loss": 0.4739, "step": 5388 }, { "epoch": 0.8694740238786706, "grad_norm": 1.689069794842885, "learning_rate": 4.586142251384995e-08, "loss": 0.4421, "step": 5389 }, { "epoch": 0.8696353662471765, "grad_norm": 2.0395825186998287, "learning_rate": 4.574987499026911e-08, "loss": 0.5058, "step": 5390 }, { "epoch": 0.8697967086156825, "grad_norm": 2.1197999271416204, "learning_rate": 4.563845678475048e-08, "loss": 0.5174, "step": 5391 }, { "epoch": 0.8699580509841884, "grad_norm": 1.9007011700877712, "learning_rate": 4.5527167929012957e-08, "loss": 0.4974, "step": 5392 }, { "epoch": 0.8701193933526944, "grad_norm": 1.65615470180212, "learning_rate": 4.5416008454738807e-08, "loss": 0.5695, "step": 5393 }, { "epoch": 0.8702807357212003, "grad_norm": 1.363797852317371, "learning_rate": 4.5304978393573555e-08, "loss": 0.4809, "step": 5394 }, { "epoch": 0.8704420780897063, "grad_norm": 1.6263749834453103, "learning_rate": 4.519407777712564e-08, "loss": 0.5042, "step": 5395 }, { "epoch": 0.8706034204582124, "grad_norm": 1.5837932052680435, "learning_rate": 4.5083306636966803e-08, "loss": 0.5188, "step": 5396 }, { "epoch": 0.8707647628267183, "grad_norm": 1.7953361888149335, "learning_rate": 4.4972665004631984e-08, "loss": 0.4168, "step": 5397 }, { "epoch": 0.8709261051952243, "grad_norm": 1.6707196388026069, "learning_rate": 4.4862152911618935e-08, "loss": 0.4329, "step": 5398 }, { "epoch": 0.8710874475637302, "grad_norm": 1.4210912200083134, "learning_rate": 4.475177038938899e-08, "loss": 0.5394, "step": 5399 }, { "epoch": 0.8712487899322362, "grad_norm": 2.0709993194844203, "learning_rate": 4.464151746936623e-08, "loss": 0.5864, "step": 5400 }, { "epoch": 0.8714101323007422, "grad_norm": 1.4655968116375753, "learning_rate": 4.4531394182938e-08, "loss": 0.4462, "step": 5401 }, { "epoch": 0.8715714746692481, "grad_norm": 1.2886365244475253, "learning_rate": 4.442140056145488e-08, "loss": 0.4344, "step": 5402 }, { "epoch": 0.8717328170377541, "grad_norm": 1.9388036956336339, "learning_rate": 4.431153663623011e-08, "loss": 0.4629, "step": 5403 }, { "epoch": 0.8718941594062601, "grad_norm": 1.807633806396457, "learning_rate": 4.420180243854049e-08, "loss": 0.5541, "step": 5404 }, { "epoch": 0.8720555017747661, "grad_norm": 1.6767612250620485, "learning_rate": 4.409219799962549e-08, "loss": 0.4678, "step": 5405 }, { "epoch": 0.872216844143272, "grad_norm": 1.5552566483341261, "learning_rate": 4.3982723350687865e-08, "loss": 0.411, "step": 5406 }, { "epoch": 0.872378186511778, "grad_norm": 1.4747183450010106, "learning_rate": 4.387337852289352e-08, "loss": 0.4765, "step": 5407 }, { "epoch": 0.872539528880284, "grad_norm": 1.7754249359609129, "learning_rate": 4.3764163547371004e-08, "loss": 0.4879, "step": 5408 }, { "epoch": 0.8727008712487899, "grad_norm": 1.2674582120488869, "learning_rate": 4.365507845521232e-08, "loss": 0.3595, "step": 5409 }, { "epoch": 0.8728622136172959, "grad_norm": 1.4535007606244532, "learning_rate": 4.354612327747237e-08, "loss": 0.2756, "step": 5410 }, { "epoch": 0.8730235559858018, "grad_norm": 1.6083089760762594, "learning_rate": 4.343729804516877e-08, "loss": 0.4873, "step": 5411 }, { "epoch": 0.8731848983543078, "grad_norm": 1.6805717092671213, "learning_rate": 4.332860278928274e-08, "loss": 0.4725, "step": 5412 }, { "epoch": 0.8733462407228139, "grad_norm": 2.042360291763148, "learning_rate": 4.322003754075787e-08, "loss": 0.6033, "step": 5413 }, { "epoch": 0.8735075830913198, "grad_norm": 1.410405356259921, "learning_rate": 4.3111602330501206e-08, "loss": 0.3874, "step": 5414 }, { "epoch": 0.8736689254598258, "grad_norm": 1.695755720432311, "learning_rate": 4.300329718938256e-08, "loss": 0.6123, "step": 5415 }, { "epoch": 0.8738302678283317, "grad_norm": 1.3944138763356653, "learning_rate": 4.289512214823465e-08, "loss": 0.4979, "step": 5416 }, { "epoch": 0.8739916101968377, "grad_norm": 1.8317375028945873, "learning_rate": 4.278707723785346e-08, "loss": 0.4819, "step": 5417 }, { "epoch": 0.8741529525653436, "grad_norm": 1.6740620648809184, "learning_rate": 4.267916248899745e-08, "loss": 0.4256, "step": 5418 }, { "epoch": 0.8743142949338496, "grad_norm": 1.581935474652024, "learning_rate": 4.2571377932388466e-08, "loss": 0.4637, "step": 5419 }, { "epoch": 0.8744756373023556, "grad_norm": 2.0783670614279877, "learning_rate": 4.24637235987112e-08, "loss": 0.5792, "step": 5420 }, { "epoch": 0.8746369796708615, "grad_norm": 1.8124142988266867, "learning_rate": 4.235619951861297e-08, "loss": 0.3353, "step": 5421 }, { "epoch": 0.8747983220393676, "grad_norm": 1.5596533211609085, "learning_rate": 4.224880572270434e-08, "loss": 0.3416, "step": 5422 }, { "epoch": 0.8749596644078735, "grad_norm": 2.0050492921320897, "learning_rate": 4.214154224155875e-08, "loss": 0.6039, "step": 5423 }, { "epoch": 0.8751210067763795, "grad_norm": 1.8797207215021934, "learning_rate": 4.203440910571238e-08, "loss": 0.5345, "step": 5424 }, { "epoch": 0.8752823491448855, "grad_norm": 1.8531504071248512, "learning_rate": 4.192740634566455e-08, "loss": 0.4611, "step": 5425 }, { "epoch": 0.8754436915133914, "grad_norm": 1.2943516737357081, "learning_rate": 4.182053399187707e-08, "loss": 0.3844, "step": 5426 }, { "epoch": 0.8756050338818974, "grad_norm": 1.5109101727922782, "learning_rate": 4.171379207477499e-08, "loss": 0.4599, "step": 5427 }, { "epoch": 0.8757663762504033, "grad_norm": 1.4943005046201083, "learning_rate": 4.160718062474622e-08, "loss": 0.3624, "step": 5428 }, { "epoch": 0.8759277186189093, "grad_norm": 1.3594803604862375, "learning_rate": 4.1500699672141206e-08, "loss": 0.508, "step": 5429 }, { "epoch": 0.8760890609874153, "grad_norm": 1.906553775784275, "learning_rate": 4.139434924727358e-08, "loss": 0.4828, "step": 5430 }, { "epoch": 0.8762504033559213, "grad_norm": 1.441433045673191, "learning_rate": 4.1288129380419534e-08, "loss": 0.352, "step": 5431 }, { "epoch": 0.8764117457244273, "grad_norm": 1.5679049278845072, "learning_rate": 4.118204010181836e-08, "loss": 0.3797, "step": 5432 }, { "epoch": 0.8765730880929332, "grad_norm": 1.1083107262411493, "learning_rate": 4.107608144167213e-08, "loss": 0.4648, "step": 5433 }, { "epoch": 0.8767344304614392, "grad_norm": 1.4585651576749492, "learning_rate": 4.097025343014532e-08, "loss": 0.612, "step": 5434 }, { "epoch": 0.8768957728299451, "grad_norm": 1.0690976230055766, "learning_rate": 4.0864556097366e-08, "loss": 0.4239, "step": 5435 }, { "epoch": 0.8770571151984511, "grad_norm": 1.438923255570551, "learning_rate": 4.0758989473424265e-08, "loss": 0.4734, "step": 5436 }, { "epoch": 0.877218457566957, "grad_norm": 1.453469922547461, "learning_rate": 4.065355358837341e-08, "loss": 0.5091, "step": 5437 }, { "epoch": 0.877379799935463, "grad_norm": 1.581969912246492, "learning_rate": 4.0548248472229484e-08, "loss": 0.3315, "step": 5438 }, { "epoch": 0.8775411423039691, "grad_norm": 1.417431017426671, "learning_rate": 4.0443074154971114e-08, "loss": 0.4187, "step": 5439 }, { "epoch": 0.877702484672475, "grad_norm": 1.9255563476963447, "learning_rate": 4.0338030666539904e-08, "loss": 0.5202, "step": 5440 }, { "epoch": 0.877863827040981, "grad_norm": 1.6331862610496237, "learning_rate": 4.023311803684021e-08, "loss": 0.534, "step": 5441 }, { "epoch": 0.8780251694094869, "grad_norm": 1.612255000937915, "learning_rate": 4.012833629573886e-08, "loss": 0.5411, "step": 5442 }, { "epoch": 0.8781865117779929, "grad_norm": 1.1960113576745257, "learning_rate": 4.002368547306573e-08, "loss": 0.314, "step": 5443 }, { "epoch": 0.8783478541464989, "grad_norm": 1.5521877262245107, "learning_rate": 3.991916559861325e-08, "loss": 0.4744, "step": 5444 }, { "epoch": 0.8785091965150048, "grad_norm": 1.287219168765916, "learning_rate": 3.9814776702136676e-08, "loss": 0.3748, "step": 5445 }, { "epoch": 0.8786705388835108, "grad_norm": 1.6481730172110196, "learning_rate": 3.971051881335391e-08, "loss": 0.4538, "step": 5446 }, { "epoch": 0.8788318812520168, "grad_norm": 1.286145186310422, "learning_rate": 3.960639196194559e-08, "loss": 0.328, "step": 5447 }, { "epoch": 0.8789932236205228, "grad_norm": 2.2536448520499284, "learning_rate": 3.950239617755507e-08, "loss": 0.641, "step": 5448 }, { "epoch": 0.8791545659890287, "grad_norm": 1.8117394660400283, "learning_rate": 3.939853148978833e-08, "loss": 0.5242, "step": 5449 }, { "epoch": 0.8793159083575347, "grad_norm": 1.7366009423869853, "learning_rate": 3.9294797928213995e-08, "loss": 0.5758, "step": 5450 }, { "epoch": 0.8794772507260407, "grad_norm": 1.3704800441364853, "learning_rate": 3.9191195522363554e-08, "loss": 0.624, "step": 5451 }, { "epoch": 0.8796385930945466, "grad_norm": 1.4592975244643693, "learning_rate": 3.908772430173091e-08, "loss": 0.4415, "step": 5452 }, { "epoch": 0.8797999354630526, "grad_norm": 1.4628828835378025, "learning_rate": 3.898438429577272e-08, "loss": 0.5709, "step": 5453 }, { "epoch": 0.8799612778315585, "grad_norm": 1.890591770857425, "learning_rate": 3.888117553390852e-08, "loss": 0.5119, "step": 5454 }, { "epoch": 0.8801226202000645, "grad_norm": 1.8992425914728872, "learning_rate": 3.877809804551996e-08, "loss": 0.6108, "step": 5455 }, { "epoch": 0.8802839625685706, "grad_norm": 1.5950366044104378, "learning_rate": 3.8675151859951906e-08, "loss": 0.4638, "step": 5456 }, { "epoch": 0.8804453049370765, "grad_norm": 1.8145602782836343, "learning_rate": 3.857233700651125e-08, "loss": 0.4626, "step": 5457 }, { "epoch": 0.8806066473055825, "grad_norm": 1.5196304877269098, "learning_rate": 3.8469653514468124e-08, "loss": 0.4518, "step": 5458 }, { "epoch": 0.8807679896740884, "grad_norm": 1.7366377358153207, "learning_rate": 3.8367101413054715e-08, "loss": 0.5811, "step": 5459 }, { "epoch": 0.8809293320425944, "grad_norm": 1.6141962961613106, "learning_rate": 3.8264680731466217e-08, "loss": 0.4814, "step": 5460 }, { "epoch": 0.8810906744111003, "grad_norm": 1.6557353677680509, "learning_rate": 3.816239149886014e-08, "loss": 0.4695, "step": 5461 }, { "epoch": 0.8812520167796063, "grad_norm": 1.1562064910125782, "learning_rate": 3.806023374435663e-08, "loss": 0.479, "step": 5462 }, { "epoch": 0.8814133591481123, "grad_norm": 1.5291121214130423, "learning_rate": 3.795820749703849e-08, "loss": 0.4096, "step": 5463 }, { "epoch": 0.8815747015166182, "grad_norm": 1.377590080818991, "learning_rate": 3.7856312785951084e-08, "loss": 0.425, "step": 5464 }, { "epoch": 0.8817360438851243, "grad_norm": 2.164887419101518, "learning_rate": 3.7754549640102164e-08, "loss": 0.4949, "step": 5465 }, { "epoch": 0.8818973862536302, "grad_norm": 1.660379551848031, "learning_rate": 3.765291808846216e-08, "loss": 0.4502, "step": 5466 }, { "epoch": 0.8820587286221362, "grad_norm": 1.4709207442175982, "learning_rate": 3.75514181599641e-08, "loss": 0.3445, "step": 5467 }, { "epoch": 0.8822200709906421, "grad_norm": 1.283453186845826, "learning_rate": 3.745004988350331e-08, "loss": 0.3031, "step": 5468 }, { "epoch": 0.8823814133591481, "grad_norm": 1.4686678599662857, "learning_rate": 3.734881328793804e-08, "loss": 0.3472, "step": 5469 }, { "epoch": 0.8825427557276541, "grad_norm": 1.6223728510992808, "learning_rate": 3.7247708402088515e-08, "loss": 0.565, "step": 5470 }, { "epoch": 0.88270409809616, "grad_norm": 1.7965395406973548, "learning_rate": 3.714673525473805e-08, "loss": 0.5423, "step": 5471 }, { "epoch": 0.882865440464666, "grad_norm": 1.3663612266699943, "learning_rate": 3.7045893874631863e-08, "loss": 0.3205, "step": 5472 }, { "epoch": 0.883026782833172, "grad_norm": 1.4974553458508815, "learning_rate": 3.694518429047805e-08, "loss": 0.6702, "step": 5473 }, { "epoch": 0.883188125201678, "grad_norm": 1.2517980994355633, "learning_rate": 3.6844606530947234e-08, "loss": 0.4164, "step": 5474 }, { "epoch": 0.883349467570184, "grad_norm": 1.8543956986606995, "learning_rate": 3.6744160624672173e-08, "loss": 0.4262, "step": 5475 }, { "epoch": 0.8835108099386899, "grad_norm": 1.597447836416774, "learning_rate": 3.664384660024833e-08, "loss": 0.3364, "step": 5476 }, { "epoch": 0.8836721523071959, "grad_norm": 2.058590738775668, "learning_rate": 3.6543664486233696e-08, "loss": 0.606, "step": 5477 }, { "epoch": 0.8838334946757018, "grad_norm": 1.9928130719712112, "learning_rate": 3.644361431114845e-08, "loss": 0.3425, "step": 5478 }, { "epoch": 0.8839948370442078, "grad_norm": 1.4137368854754266, "learning_rate": 3.634369610347532e-08, "loss": 0.4986, "step": 5479 }, { "epoch": 0.8841561794127137, "grad_norm": 1.6992869396339305, "learning_rate": 3.62439098916596e-08, "loss": 0.4641, "step": 5480 }, { "epoch": 0.8843175217812197, "grad_norm": 1.4147505798500437, "learning_rate": 3.614425570410884e-08, "loss": 0.4115, "step": 5481 }, { "epoch": 0.8844788641497258, "grad_norm": 1.6897069839530974, "learning_rate": 3.6044733569193184e-08, "loss": 0.4592, "step": 5482 }, { "epoch": 0.8846402065182317, "grad_norm": 1.4797843216635853, "learning_rate": 3.594534351524486e-08, "loss": 0.445, "step": 5483 }, { "epoch": 0.8848015488867377, "grad_norm": 1.9322395283398874, "learning_rate": 3.5846085570558845e-08, "loss": 0.5993, "step": 5484 }, { "epoch": 0.8849628912552436, "grad_norm": 1.5348645581063356, "learning_rate": 3.574695976339226e-08, "loss": 0.3867, "step": 5485 }, { "epoch": 0.8851242336237496, "grad_norm": 2.1176642412535758, "learning_rate": 3.564796612196474e-08, "loss": 0.6233, "step": 5486 }, { "epoch": 0.8852855759922555, "grad_norm": 2.0447696020639836, "learning_rate": 3.554910467445832e-08, "loss": 0.5456, "step": 5487 }, { "epoch": 0.8854469183607615, "grad_norm": 1.3942593007494841, "learning_rate": 3.545037544901725e-08, "loss": 0.3477, "step": 5488 }, { "epoch": 0.8856082607292675, "grad_norm": 1.4274253249803948, "learning_rate": 3.535177847374826e-08, "loss": 0.4473, "step": 5489 }, { "epoch": 0.8857696030977735, "grad_norm": 1.509914770815745, "learning_rate": 3.525331377672047e-08, "loss": 0.4972, "step": 5490 }, { "epoch": 0.8859309454662795, "grad_norm": 1.7691155423243292, "learning_rate": 3.5154981385965124e-08, "loss": 0.4676, "step": 5491 }, { "epoch": 0.8860922878347854, "grad_norm": 1.6224949675307088, "learning_rate": 3.505678132947615e-08, "loss": 0.3504, "step": 5492 }, { "epoch": 0.8862536302032914, "grad_norm": 1.5426094506010717, "learning_rate": 3.4958713635209416e-08, "loss": 0.4888, "step": 5493 }, { "epoch": 0.8864149725717974, "grad_norm": 1.876576777903342, "learning_rate": 3.486077833108342e-08, "loss": 0.4943, "step": 5494 }, { "epoch": 0.8865763149403033, "grad_norm": 1.5111385681113731, "learning_rate": 3.4762975444978807e-08, "loss": 0.418, "step": 5495 }, { "epoch": 0.8867376573088093, "grad_norm": 1.5356958426195007, "learning_rate": 3.466530500473852e-08, "loss": 0.5351, "step": 5496 }, { "epoch": 0.8868989996773152, "grad_norm": 1.8039948426403412, "learning_rate": 3.456776703816794e-08, "loss": 0.4467, "step": 5497 }, { "epoch": 0.8870603420458212, "grad_norm": 2.204990435901755, "learning_rate": 3.4470361573034454e-08, "loss": 0.391, "step": 5498 }, { "epoch": 0.8872216844143272, "grad_norm": 1.826368613023786, "learning_rate": 3.437308863706806e-08, "loss": 0.4428, "step": 5499 }, { "epoch": 0.8873830267828332, "grad_norm": 1.3504064389627852, "learning_rate": 3.427594825796093e-08, "loss": 0.3831, "step": 5500 }, { "epoch": 0.8875443691513392, "grad_norm": 1.4696736779314286, "learning_rate": 3.417894046336728e-08, "loss": 0.5577, "step": 5501 }, { "epoch": 0.8877057115198451, "grad_norm": 1.5604600941922244, "learning_rate": 3.408206528090374e-08, "loss": 0.4814, "step": 5502 }, { "epoch": 0.8878670538883511, "grad_norm": 1.152825878600296, "learning_rate": 3.3985322738149356e-08, "loss": 0.3896, "step": 5503 }, { "epoch": 0.888028396256857, "grad_norm": 1.2668082275258352, "learning_rate": 3.388871286264511e-08, "loss": 0.4885, "step": 5504 }, { "epoch": 0.888189738625363, "grad_norm": 1.6824538051390896, "learning_rate": 3.3792235681894485e-08, "loss": 0.4233, "step": 5505 }, { "epoch": 0.888351080993869, "grad_norm": 1.8254314696405352, "learning_rate": 3.3695891223362914e-08, "loss": 0.4873, "step": 5506 }, { "epoch": 0.888512423362375, "grad_norm": 2.2082968534899345, "learning_rate": 3.359967951447823e-08, "loss": 0.4493, "step": 5507 }, { "epoch": 0.888673765730881, "grad_norm": 1.6749621970622275, "learning_rate": 3.350360058263058e-08, "loss": 0.4105, "step": 5508 }, { "epoch": 0.8888351080993869, "grad_norm": 1.45754046912306, "learning_rate": 3.340765445517196e-08, "loss": 0.478, "step": 5509 }, { "epoch": 0.8889964504678929, "grad_norm": 1.5442413104249602, "learning_rate": 3.3311841159416934e-08, "loss": 0.4073, "step": 5510 }, { "epoch": 0.8891577928363988, "grad_norm": 2.209068343963858, "learning_rate": 3.321616072264194e-08, "loss": 0.5001, "step": 5511 }, { "epoch": 0.8893191352049048, "grad_norm": 1.3866663963213681, "learning_rate": 3.3120613172085756e-08, "loss": 0.4081, "step": 5512 }, { "epoch": 0.8894804775734108, "grad_norm": 1.4051715106065437, "learning_rate": 3.302519853494945e-08, "loss": 0.4765, "step": 5513 }, { "epoch": 0.8896418199419167, "grad_norm": 1.5571706293161747, "learning_rate": 3.2929916838395856e-08, "loss": 0.335, "step": 5514 }, { "epoch": 0.8898031623104227, "grad_norm": 1.363649640957899, "learning_rate": 3.283476810955049e-08, "loss": 0.3787, "step": 5515 }, { "epoch": 0.8899645046789287, "grad_norm": 1.6642652934264075, "learning_rate": 3.2739752375500586e-08, "loss": 0.4599, "step": 5516 }, { "epoch": 0.8901258470474347, "grad_norm": 2.341988066567136, "learning_rate": 3.264486966329566e-08, "loss": 0.4924, "step": 5517 }, { "epoch": 0.8902871894159406, "grad_norm": 1.2704150600721378, "learning_rate": 3.25501199999475e-08, "loss": 0.4263, "step": 5518 }, { "epoch": 0.8904485317844466, "grad_norm": 1.8189176528058104, "learning_rate": 3.245550341242975e-08, "loss": 0.3871, "step": 5519 }, { "epoch": 0.8906098741529526, "grad_norm": 2.1544033380453524, "learning_rate": 3.236101992767831e-08, "loss": 0.5396, "step": 5520 }, { "epoch": 0.8907712165214585, "grad_norm": 1.5759210587883883, "learning_rate": 3.226666957259133e-08, "loss": 0.4973, "step": 5521 }, { "epoch": 0.8909325588899645, "grad_norm": 1.1240923186804734, "learning_rate": 3.2172452374028716e-08, "loss": 0.3576, "step": 5522 }, { "epoch": 0.8910939012584704, "grad_norm": 1.6618604821083531, "learning_rate": 3.207836835881278e-08, "loss": 0.4738, "step": 5523 }, { "epoch": 0.8912552436269764, "grad_norm": 1.2389547640322125, "learning_rate": 3.198441755372777e-08, "loss": 0.4773, "step": 5524 }, { "epoch": 0.8914165859954825, "grad_norm": 1.6606403677175092, "learning_rate": 3.1890599985520014e-08, "loss": 0.5278, "step": 5525 }, { "epoch": 0.8915779283639884, "grad_norm": 1.6376554954024074, "learning_rate": 3.179691568089798e-08, "loss": 0.4905, "step": 5526 }, { "epoch": 0.8917392707324944, "grad_norm": 1.6090174898248484, "learning_rate": 3.170336466653217e-08, "loss": 0.5787, "step": 5527 }, { "epoch": 0.8919006131010003, "grad_norm": 1.2959869285560086, "learning_rate": 3.160994696905517e-08, "loss": 0.3103, "step": 5528 }, { "epoch": 0.8920619554695063, "grad_norm": 1.9460033737263147, "learning_rate": 3.1516662615061416e-08, "loss": 0.4652, "step": 5529 }, { "epoch": 0.8922232978380122, "grad_norm": 1.518579964872016, "learning_rate": 3.142351163110768e-08, "loss": 0.4499, "step": 5530 }, { "epoch": 0.8923846402065182, "grad_norm": 1.5147948203563149, "learning_rate": 3.133049404371257e-08, "loss": 0.4255, "step": 5531 }, { "epoch": 0.8925459825750242, "grad_norm": 1.3297601843578573, "learning_rate": 3.123760987935675e-08, "loss": 0.4521, "step": 5532 }, { "epoch": 0.8927073249435302, "grad_norm": 1.264213104141145, "learning_rate": 3.1144859164482896e-08, "loss": 0.517, "step": 5533 }, { "epoch": 0.8928686673120362, "grad_norm": 1.691404417534732, "learning_rate": 3.105224192549588e-08, "loss": 0.3883, "step": 5534 }, { "epoch": 0.8930300096805421, "grad_norm": 1.5826380943321268, "learning_rate": 3.0959758188762175e-08, "loss": 0.384, "step": 5535 }, { "epoch": 0.8931913520490481, "grad_norm": 1.701854687029107, "learning_rate": 3.086740798061071e-08, "loss": 0.5146, "step": 5536 }, { "epoch": 0.893352694417554, "grad_norm": 1.712256215139241, "learning_rate": 3.077519132733192e-08, "loss": 0.6016, "step": 5537 }, { "epoch": 0.89351403678606, "grad_norm": 1.3318010058495315, "learning_rate": 3.068310825517878e-08, "loss": 0.5462, "step": 5538 }, { "epoch": 0.893675379154566, "grad_norm": 1.5670740696941663, "learning_rate": 3.0591158790365665e-08, "loss": 0.486, "step": 5539 }, { "epoch": 0.8938367215230719, "grad_norm": 1.1431990020642153, "learning_rate": 3.0499342959069364e-08, "loss": 0.5026, "step": 5540 }, { "epoch": 0.8939980638915779, "grad_norm": 1.5158533140644355, "learning_rate": 3.0407660787428404e-08, "loss": 0.4646, "step": 5541 }, { "epoch": 0.8941594062600839, "grad_norm": 1.3160733670854277, "learning_rate": 3.031611230154324e-08, "loss": 0.35, "step": 5542 }, { "epoch": 0.8943207486285899, "grad_norm": 1.4325281491268405, "learning_rate": 3.022469752747631e-08, "loss": 0.5915, "step": 5543 }, { "epoch": 0.8944820909970959, "grad_norm": 1.996293089705543, "learning_rate": 3.013341649125212e-08, "loss": 0.6313, "step": 5544 }, { "epoch": 0.8946434333656018, "grad_norm": 1.996975757999969, "learning_rate": 3.004226921885683e-08, "loss": 0.4116, "step": 5545 }, { "epoch": 0.8948047757341078, "grad_norm": 1.4461088718672646, "learning_rate": 2.995125573623875e-08, "loss": 0.5193, "step": 5546 }, { "epoch": 0.8949661181026137, "grad_norm": 1.598302360780857, "learning_rate": 2.986037606930808e-08, "loss": 0.4419, "step": 5547 }, { "epoch": 0.8951274604711197, "grad_norm": 3.6339320928918735, "learning_rate": 2.9769630243936806e-08, "loss": 0.5809, "step": 5548 }, { "epoch": 0.8952888028396256, "grad_norm": 1.4153594923819819, "learning_rate": 2.9679018285958812e-08, "loss": 0.5409, "step": 5549 }, { "epoch": 0.8954501452081317, "grad_norm": 1.6946817094723976, "learning_rate": 2.9588540221170077e-08, "loss": 0.4656, "step": 5550 }, { "epoch": 0.8956114875766377, "grad_norm": 1.687663670832485, "learning_rate": 2.949819607532833e-08, "loss": 0.3366, "step": 5551 }, { "epoch": 0.8957728299451436, "grad_norm": 1.3996993184767095, "learning_rate": 2.9407985874152996e-08, "loss": 0.3479, "step": 5552 }, { "epoch": 0.8959341723136496, "grad_norm": 1.3085142196856352, "learning_rate": 2.9317909643325645e-08, "loss": 0.2394, "step": 5553 }, { "epoch": 0.8960955146821555, "grad_norm": 1.5609914745510995, "learning_rate": 2.922796740848965e-08, "loss": 0.4167, "step": 5554 }, { "epoch": 0.8962568570506615, "grad_norm": 1.568429301640072, "learning_rate": 2.9138159195250088e-08, "loss": 0.5272, "step": 5555 }, { "epoch": 0.8964181994191674, "grad_norm": 1.4906952271622875, "learning_rate": 2.9048485029174007e-08, "loss": 0.4221, "step": 5556 }, { "epoch": 0.8965795417876734, "grad_norm": 1.8634211649635348, "learning_rate": 2.895894493579043e-08, "loss": 0.4024, "step": 5557 }, { "epoch": 0.8967408841561794, "grad_norm": 1.6972241149672578, "learning_rate": 2.88695389405898e-08, "loss": 0.4039, "step": 5558 }, { "epoch": 0.8969022265246854, "grad_norm": 1.3704836104594924, "learning_rate": 2.8780267069024754e-08, "loss": 0.577, "step": 5559 }, { "epoch": 0.8970635688931914, "grad_norm": 1.2243305479025441, "learning_rate": 2.8691129346509635e-08, "loss": 0.578, "step": 5560 }, { "epoch": 0.8972249112616973, "grad_norm": 1.240078701551956, "learning_rate": 2.860212579842053e-08, "loss": 0.4427, "step": 5561 }, { "epoch": 0.8973862536302033, "grad_norm": 1.7969924224965474, "learning_rate": 2.8513256450095558e-08, "loss": 0.4657, "step": 5562 }, { "epoch": 0.8975475959987093, "grad_norm": 1.4684819727704548, "learning_rate": 2.842452132683426e-08, "loss": 0.4106, "step": 5563 }, { "epoch": 0.8977089383672152, "grad_norm": 1.9360357104152737, "learning_rate": 2.8335920453898375e-08, "loss": 0.7557, "step": 5564 }, { "epoch": 0.8978702807357212, "grad_norm": 2.400503753404915, "learning_rate": 2.824745385651095e-08, "loss": 0.7656, "step": 5565 }, { "epoch": 0.8980316231042271, "grad_norm": 1.3988940809562735, "learning_rate": 2.815912155985728e-08, "loss": 0.4802, "step": 5566 }, { "epoch": 0.8981929654727331, "grad_norm": 1.612194369427723, "learning_rate": 2.8070923589084193e-08, "loss": 0.4227, "step": 5567 }, { "epoch": 0.8983543078412392, "grad_norm": 1.5838185704538192, "learning_rate": 2.7982859969300166e-08, "loss": 0.5126, "step": 5568 }, { "epoch": 0.8985156502097451, "grad_norm": 1.5702721130339696, "learning_rate": 2.7894930725575695e-08, "loss": 0.431, "step": 5569 }, { "epoch": 0.8986769925782511, "grad_norm": 1.3201014333101833, "learning_rate": 2.780713588294292e-08, "loss": 0.3848, "step": 5570 }, { "epoch": 0.898838334946757, "grad_norm": 1.4180021255810817, "learning_rate": 2.7719475466395582e-08, "loss": 0.4319, "step": 5571 }, { "epoch": 0.898999677315263, "grad_norm": 2.7553609266132417, "learning_rate": 2.7631949500889263e-08, "loss": 0.4689, "step": 5572 }, { "epoch": 0.8991610196837689, "grad_norm": 1.070435621322201, "learning_rate": 2.7544558011341312e-08, "loss": 0.3999, "step": 5573 }, { "epoch": 0.8993223620522749, "grad_norm": 1.7954835894861685, "learning_rate": 2.7457301022630774e-08, "loss": 0.6287, "step": 5574 }, { "epoch": 0.8994837044207808, "grad_norm": 1.6981048089695416, "learning_rate": 2.7370178559598388e-08, "loss": 0.3549, "step": 5575 }, { "epoch": 0.8996450467892869, "grad_norm": 2.4117635996472573, "learning_rate": 2.7283190647046537e-08, "loss": 0.7398, "step": 5576 }, { "epoch": 0.8998063891577929, "grad_norm": 0.954490527672024, "learning_rate": 2.7196337309739414e-08, "loss": 0.2901, "step": 5577 }, { "epoch": 0.8999677315262988, "grad_norm": 1.6143875577632425, "learning_rate": 2.710961857240268e-08, "loss": 0.3013, "step": 5578 }, { "epoch": 0.9001290738948048, "grad_norm": 1.4420964105838496, "learning_rate": 2.7023034459723927e-08, "loss": 0.5085, "step": 5579 }, { "epoch": 0.9002904162633107, "grad_norm": 1.683724028819631, "learning_rate": 2.693658499635243e-08, "loss": 0.4143, "step": 5580 }, { "epoch": 0.9004517586318167, "grad_norm": 2.0040540138777576, "learning_rate": 2.6850270206898894e-08, "loss": 0.4937, "step": 5581 }, { "epoch": 0.9006131010003227, "grad_norm": 2.0406516010601075, "learning_rate": 2.6764090115935834e-08, "loss": 0.487, "step": 5582 }, { "epoch": 0.9007744433688286, "grad_norm": 1.6946229015589749, "learning_rate": 2.667804474799745e-08, "loss": 0.5272, "step": 5583 }, { "epoch": 0.9009357857373346, "grad_norm": 1.4004923465659913, "learning_rate": 2.659213412757949e-08, "loss": 0.396, "step": 5584 }, { "epoch": 0.9010971281058406, "grad_norm": 1.9839400167626804, "learning_rate": 2.650635827913955e-08, "loss": 0.4824, "step": 5585 }, { "epoch": 0.9012584704743466, "grad_norm": 1.7641419790658137, "learning_rate": 2.6420717227096433e-08, "loss": 0.5051, "step": 5586 }, { "epoch": 0.9014198128428526, "grad_norm": 1.369880813579335, "learning_rate": 2.6335210995831025e-08, "loss": 0.4017, "step": 5587 }, { "epoch": 0.9015811552113585, "grad_norm": 1.5218232821659465, "learning_rate": 2.6249839609685688e-08, "loss": 0.4264, "step": 5588 }, { "epoch": 0.9017424975798645, "grad_norm": 0.9704703775372889, "learning_rate": 2.61646030929642e-08, "loss": 0.3102, "step": 5589 }, { "epoch": 0.9019038399483704, "grad_norm": 1.4014578653698377, "learning_rate": 2.607950146993215e-08, "loss": 0.5596, "step": 5590 }, { "epoch": 0.9020651823168764, "grad_norm": 1.4565113766268805, "learning_rate": 2.5994534764816666e-08, "loss": 0.3837, "step": 5591 }, { "epoch": 0.9022265246853823, "grad_norm": 1.5058563667791776, "learning_rate": 2.590970300180645e-08, "loss": 0.4452, "step": 5592 }, { "epoch": 0.9023878670538884, "grad_norm": 1.1621809224134656, "learning_rate": 2.5825006205051903e-08, "loss": 0.2977, "step": 5593 }, { "epoch": 0.9025492094223944, "grad_norm": 1.2959217106814185, "learning_rate": 2.5740444398664795e-08, "loss": 0.3641, "step": 5594 }, { "epoch": 0.9027105517909003, "grad_norm": 1.390325621193387, "learning_rate": 2.5656017606718593e-08, "loss": 0.3503, "step": 5595 }, { "epoch": 0.9028718941594063, "grad_norm": 1.4972522523015601, "learning_rate": 2.557172585324835e-08, "loss": 0.4979, "step": 5596 }, { "epoch": 0.9030332365279122, "grad_norm": 1.4489509766988622, "learning_rate": 2.5487569162250645e-08, "loss": 0.3946, "step": 5597 }, { "epoch": 0.9031945788964182, "grad_norm": 1.6250746049627833, "learning_rate": 2.5403547557683645e-08, "loss": 0.5425, "step": 5598 }, { "epoch": 0.9033559212649241, "grad_norm": 1.59459865638794, "learning_rate": 2.531966106346689e-08, "loss": 0.4576, "step": 5599 }, { "epoch": 0.9035172636334301, "grad_norm": 1.7693510321661627, "learning_rate": 2.523590970348166e-08, "loss": 0.4601, "step": 5600 }, { "epoch": 0.9036786060019361, "grad_norm": 1.557584584053371, "learning_rate": 2.5152293501570722e-08, "loss": 0.4876, "step": 5601 }, { "epoch": 0.9038399483704421, "grad_norm": 2.244457093078341, "learning_rate": 2.506881248153819e-08, "loss": 0.641, "step": 5602 }, { "epoch": 0.9040012907389481, "grad_norm": 1.9246269929829338, "learning_rate": 2.4985466667150067e-08, "loss": 0.581, "step": 5603 }, { "epoch": 0.904162633107454, "grad_norm": 1.5942231766129784, "learning_rate": 2.4902256082133366e-08, "loss": 0.4419, "step": 5604 }, { "epoch": 0.90432397547596, "grad_norm": 1.4632904362021033, "learning_rate": 2.4819180750177026e-08, "loss": 0.4796, "step": 5605 }, { "epoch": 0.904485317844466, "grad_norm": 1.3503374048967516, "learning_rate": 2.4736240694931242e-08, "loss": 0.3686, "step": 5606 }, { "epoch": 0.9046466602129719, "grad_norm": 1.6089877060728144, "learning_rate": 2.465343594000785e-08, "loss": 0.4713, "step": 5607 }, { "epoch": 0.9048080025814779, "grad_norm": 1.7087130085890478, "learning_rate": 2.4570766508980156e-08, "loss": 0.5718, "step": 5608 }, { "epoch": 0.9049693449499838, "grad_norm": 1.8037570024276448, "learning_rate": 2.4488232425382727e-08, "loss": 0.5322, "step": 5609 }, { "epoch": 0.9051306873184899, "grad_norm": 2.021457955745235, "learning_rate": 2.4405833712711822e-08, "loss": 0.4601, "step": 5610 }, { "epoch": 0.9052920296869958, "grad_norm": 1.1684691719033398, "learning_rate": 2.4323570394425175e-08, "loss": 0.3488, "step": 5611 }, { "epoch": 0.9054533720555018, "grad_norm": 1.8480389382194555, "learning_rate": 2.424144249394172e-08, "loss": 0.5304, "step": 5612 }, { "epoch": 0.9056147144240078, "grad_norm": 1.8597552127142032, "learning_rate": 2.4159450034642148e-08, "loss": 0.4892, "step": 5613 }, { "epoch": 0.9057760567925137, "grad_norm": 1.7043105732316797, "learning_rate": 2.407759303986845e-08, "loss": 0.4479, "step": 5614 }, { "epoch": 0.9059373991610197, "grad_norm": 1.7798072593792116, "learning_rate": 2.3995871532923984e-08, "loss": 0.5968, "step": 5615 }, { "epoch": 0.9060987415295256, "grad_norm": 1.713246498851352, "learning_rate": 2.3914285537073754e-08, "loss": 0.4523, "step": 5616 }, { "epoch": 0.9062600838980316, "grad_norm": 1.6654799607559274, "learning_rate": 2.3832835075543844e-08, "loss": 0.3472, "step": 5617 }, { "epoch": 0.9064214262665375, "grad_norm": 1.5272532872876339, "learning_rate": 2.37515201715221e-08, "loss": 0.4895, "step": 5618 }, { "epoch": 0.9065827686350436, "grad_norm": 1.7303421973114892, "learning_rate": 2.3670340848157554e-08, "loss": 0.418, "step": 5619 }, { "epoch": 0.9067441110035496, "grad_norm": 1.4209555702914534, "learning_rate": 2.3589297128560837e-08, "loss": 0.489, "step": 5620 }, { "epoch": 0.9069054533720555, "grad_norm": 1.6843623201533475, "learning_rate": 2.3508389035803768e-08, "loss": 0.4622, "step": 5621 }, { "epoch": 0.9070667957405615, "grad_norm": 1.6987939764061095, "learning_rate": 2.3427616592919585e-08, "loss": 0.3735, "step": 5622 }, { "epoch": 0.9072281381090674, "grad_norm": 1.3487657485622286, "learning_rate": 2.3346979822903067e-08, "loss": 0.5337, "step": 5623 }, { "epoch": 0.9073894804775734, "grad_norm": 2.0575556589711987, "learning_rate": 2.3266478748710294e-08, "loss": 0.2768, "step": 5624 }, { "epoch": 0.9075508228460794, "grad_norm": 1.9915419785588404, "learning_rate": 2.31861133932586e-08, "loss": 0.6282, "step": 5625 }, { "epoch": 0.9077121652145853, "grad_norm": 1.3060924252465589, "learning_rate": 2.310588377942674e-08, "loss": 0.5796, "step": 5626 }, { "epoch": 0.9078735075830913, "grad_norm": 1.6530204812019702, "learning_rate": 2.302578993005505e-08, "loss": 0.5302, "step": 5627 }, { "epoch": 0.9080348499515973, "grad_norm": 1.7155361993317575, "learning_rate": 2.29458318679448e-08, "loss": 0.4181, "step": 5628 }, { "epoch": 0.9081961923201033, "grad_norm": 2.3116058476741275, "learning_rate": 2.2866009615858883e-08, "loss": 0.5691, "step": 5629 }, { "epoch": 0.9083575346886092, "grad_norm": 1.5050159670214067, "learning_rate": 2.2786323196521573e-08, "loss": 0.489, "step": 5630 }, { "epoch": 0.9085188770571152, "grad_norm": 1.5030039272599678, "learning_rate": 2.2706772632618388e-08, "loss": 0.5047, "step": 5631 }, { "epoch": 0.9086802194256212, "grad_norm": 1.0819950579708428, "learning_rate": 2.2627357946795988e-08, "loss": 0.3743, "step": 5632 }, { "epoch": 0.9088415617941271, "grad_norm": 1.6754452384075302, "learning_rate": 2.2548079161662627e-08, "loss": 0.4922, "step": 5633 }, { "epoch": 0.9090029041626331, "grad_norm": 2.1317291302908115, "learning_rate": 2.24689362997878e-08, "loss": 0.52, "step": 5634 }, { "epoch": 0.909164246531139, "grad_norm": 1.5832935713075216, "learning_rate": 2.2389929383702154e-08, "loss": 0.2794, "step": 5635 }, { "epoch": 0.9093255888996451, "grad_norm": 1.6108168282068647, "learning_rate": 2.2311058435897868e-08, "loss": 0.4114, "step": 5636 }, { "epoch": 0.909486931268151, "grad_norm": 1.4479915224480497, "learning_rate": 2.2232323478828252e-08, "loss": 0.3986, "step": 5637 }, { "epoch": 0.909648273636657, "grad_norm": 1.2326437966453505, "learning_rate": 2.215372453490788e-08, "loss": 0.4162, "step": 5638 }, { "epoch": 0.909809616005163, "grad_norm": 1.6319000030670923, "learning_rate": 2.207526162651274e-08, "loss": 0.4967, "step": 5639 }, { "epoch": 0.9099709583736689, "grad_norm": 1.8307512118304143, "learning_rate": 2.1996934775980082e-08, "loss": 0.5379, "step": 5640 }, { "epoch": 0.9101323007421749, "grad_norm": 1.5007919763899606, "learning_rate": 2.1918744005608113e-08, "loss": 0.2597, "step": 5641 }, { "epoch": 0.9102936431106808, "grad_norm": 1.5997991763683534, "learning_rate": 2.184068933765687e-08, "loss": 0.4999, "step": 5642 }, { "epoch": 0.9104549854791868, "grad_norm": 2.319240746401828, "learning_rate": 2.1762770794347128e-08, "loss": 0.5287, "step": 5643 }, { "epoch": 0.9106163278476928, "grad_norm": 1.850591928276088, "learning_rate": 2.1684988397861204e-08, "loss": 0.5842, "step": 5644 }, { "epoch": 0.9107776702161988, "grad_norm": 1.3688914552517637, "learning_rate": 2.1607342170342434e-08, "loss": 0.4694, "step": 5645 }, { "epoch": 0.9109390125847048, "grad_norm": 1.7577937824524292, "learning_rate": 2.1529832133895588e-08, "loss": 0.5732, "step": 5646 }, { "epoch": 0.9111003549532107, "grad_norm": 2.4184668402315967, "learning_rate": 2.145245831058662e-08, "loss": 0.4589, "step": 5647 }, { "epoch": 0.9112616973217167, "grad_norm": 1.7941795366495672, "learning_rate": 2.137522072244258e-08, "loss": 0.4002, "step": 5648 }, { "epoch": 0.9114230396902226, "grad_norm": 1.5512147388861721, "learning_rate": 2.1298119391451874e-08, "loss": 0.5198, "step": 5649 }, { "epoch": 0.9115843820587286, "grad_norm": 1.6787815223398654, "learning_rate": 2.1221154339564174e-08, "loss": 0.539, "step": 5650 }, { "epoch": 0.9117457244272346, "grad_norm": 1.4617341669228903, "learning_rate": 2.1144325588690114e-08, "loss": 0.4757, "step": 5651 }, { "epoch": 0.9119070667957405, "grad_norm": 1.1753528612991708, "learning_rate": 2.1067633160701648e-08, "loss": 0.4004, "step": 5652 }, { "epoch": 0.9120684091642466, "grad_norm": 1.7172823101385641, "learning_rate": 2.099107707743203e-08, "loss": 0.5099, "step": 5653 }, { "epoch": 0.9122297515327525, "grad_norm": 1.8214509866441166, "learning_rate": 2.09146573606756e-08, "loss": 0.5052, "step": 5654 }, { "epoch": 0.9123910939012585, "grad_norm": 1.3022076916119028, "learning_rate": 2.0838374032187855e-08, "loss": 0.3688, "step": 5655 }, { "epoch": 0.9125524362697645, "grad_norm": 1.657545770441836, "learning_rate": 2.076222711368547e-08, "loss": 0.4333, "step": 5656 }, { "epoch": 0.9127137786382704, "grad_norm": 1.4018332739720287, "learning_rate": 2.0686216626846385e-08, "loss": 0.5676, "step": 5657 }, { "epoch": 0.9128751210067764, "grad_norm": 1.3052242339988027, "learning_rate": 2.0610342593309516e-08, "loss": 0.517, "step": 5658 }, { "epoch": 0.9130364633752823, "grad_norm": 1.7507456825657413, "learning_rate": 2.053460503467508e-08, "loss": 0.558, "step": 5659 }, { "epoch": 0.9131978057437883, "grad_norm": 1.404260329830784, "learning_rate": 2.0459003972504496e-08, "loss": 0.4452, "step": 5660 }, { "epoch": 0.9133591481122942, "grad_norm": 1.4261580295982537, "learning_rate": 2.038353942832005e-08, "loss": 0.439, "step": 5661 }, { "epoch": 0.9135204904808003, "grad_norm": 1.9371463699023632, "learning_rate": 2.0308211423605505e-08, "loss": 0.3914, "step": 5662 }, { "epoch": 0.9136818328493063, "grad_norm": 1.5674420592141614, "learning_rate": 2.0233019979805533e-08, "loss": 0.513, "step": 5663 }, { "epoch": 0.9138431752178122, "grad_norm": 1.8672977558606871, "learning_rate": 2.0157965118325903e-08, "loss": 0.5628, "step": 5664 }, { "epoch": 0.9140045175863182, "grad_norm": 1.902310976220027, "learning_rate": 2.008304686053375e-08, "loss": 0.4692, "step": 5665 }, { "epoch": 0.9141658599548241, "grad_norm": 1.631671780833775, "learning_rate": 2.0008265227757003e-08, "loss": 0.475, "step": 5666 }, { "epoch": 0.9143272023233301, "grad_norm": 1.552704432936543, "learning_rate": 1.993362024128492e-08, "loss": 0.4164, "step": 5667 }, { "epoch": 0.914488544691836, "grad_norm": 1.2841919873014815, "learning_rate": 1.9859111922367887e-08, "loss": 0.3732, "step": 5668 }, { "epoch": 0.914649887060342, "grad_norm": 1.7209288917715506, "learning_rate": 1.9784740292217107e-08, "loss": 0.5646, "step": 5669 }, { "epoch": 0.914811229428848, "grad_norm": 1.8684016792983993, "learning_rate": 1.9710505372005136e-08, "loss": 0.5347, "step": 5670 }, { "epoch": 0.914972571797354, "grad_norm": 1.1540606106851887, "learning_rate": 1.963640718286552e-08, "loss": 0.5252, "step": 5671 }, { "epoch": 0.91513391416586, "grad_norm": 2.099120809799498, "learning_rate": 1.9562445745892776e-08, "loss": 0.499, "step": 5672 }, { "epoch": 0.9152952565343659, "grad_norm": 1.4727980649140766, "learning_rate": 1.948862108214283e-08, "loss": 0.3058, "step": 5673 }, { "epoch": 0.9154565989028719, "grad_norm": 1.5461358510396153, "learning_rate": 1.9414933212632157e-08, "loss": 0.5426, "step": 5674 }, { "epoch": 0.9156179412713779, "grad_norm": 1.7548166473771172, "learning_rate": 1.934138215833869e-08, "loss": 0.5732, "step": 5675 }, { "epoch": 0.9157792836398838, "grad_norm": 1.126196913320098, "learning_rate": 1.9267967940201357e-08, "loss": 0.282, "step": 5676 }, { "epoch": 0.9159406260083898, "grad_norm": 1.96571661367231, "learning_rate": 1.919469057911993e-08, "loss": 0.6875, "step": 5677 }, { "epoch": 0.9161019683768957, "grad_norm": 1.238671851902495, "learning_rate": 1.9121550095955508e-08, "loss": 0.4541, "step": 5678 }, { "epoch": 0.9162633107454018, "grad_norm": 1.3025856219098655, "learning_rate": 1.904854651152993e-08, "loss": 0.4262, "step": 5679 }, { "epoch": 0.9164246531139078, "grad_norm": 1.707978847600493, "learning_rate": 1.8975679846626293e-08, "loss": 0.5927, "step": 5680 }, { "epoch": 0.9165859954824137, "grad_norm": 1.7706872393612716, "learning_rate": 1.8902950121988558e-08, "loss": 0.5121, "step": 5681 }, { "epoch": 0.9167473378509197, "grad_norm": 1.8714919015075773, "learning_rate": 1.883035735832178e-08, "loss": 0.4367, "step": 5682 }, { "epoch": 0.9169086802194256, "grad_norm": 1.5025884229693616, "learning_rate": 1.875790157629209e-08, "loss": 0.4289, "step": 5683 }, { "epoch": 0.9170700225879316, "grad_norm": 1.6656138750746117, "learning_rate": 1.868558279652638e-08, "loss": 0.5453, "step": 5684 }, { "epoch": 0.9172313649564375, "grad_norm": 1.314600897952269, "learning_rate": 1.8613401039612788e-08, "loss": 0.4779, "step": 5685 }, { "epoch": 0.9173927073249435, "grad_norm": 1.3987541905699163, "learning_rate": 1.8541356326100433e-08, "loss": 0.2992, "step": 5686 }, { "epoch": 0.9175540496934494, "grad_norm": 1.5761659756801774, "learning_rate": 1.8469448676499134e-08, "loss": 0.4643, "step": 5687 }, { "epoch": 0.9177153920619555, "grad_norm": 1.367736662517789, "learning_rate": 1.8397678111280125e-08, "loss": 0.4222, "step": 5688 }, { "epoch": 0.9178767344304615, "grad_norm": 2.0922299461246934, "learning_rate": 1.8326044650875227e-08, "loss": 0.368, "step": 5689 }, { "epoch": 0.9180380767989674, "grad_norm": 1.3296966229673817, "learning_rate": 1.825454831567752e-08, "loss": 0.3791, "step": 5690 }, { "epoch": 0.9181994191674734, "grad_norm": 1.9454144064320322, "learning_rate": 1.8183189126040887e-08, "loss": 0.597, "step": 5691 }, { "epoch": 0.9183607615359793, "grad_norm": 1.4567596755170444, "learning_rate": 1.811196710228008e-08, "loss": 0.3087, "step": 5692 }, { "epoch": 0.9185221039044853, "grad_norm": 1.165129204817675, "learning_rate": 1.8040882264671043e-08, "loss": 0.452, "step": 5693 }, { "epoch": 0.9186834462729913, "grad_norm": 1.6791202740693263, "learning_rate": 1.7969934633450533e-08, "loss": 0.5395, "step": 5694 }, { "epoch": 0.9188447886414972, "grad_norm": 1.1137608080852226, "learning_rate": 1.7899124228816177e-08, "loss": 0.4238, "step": 5695 }, { "epoch": 0.9190061310100033, "grad_norm": 1.3281327640082117, "learning_rate": 1.7828451070926675e-08, "loss": 0.3577, "step": 5696 }, { "epoch": 0.9191674733785092, "grad_norm": 1.7898606431514343, "learning_rate": 1.7757915179901604e-08, "loss": 0.6186, "step": 5697 }, { "epoch": 0.9193288157470152, "grad_norm": 2.2357015873153587, "learning_rate": 1.7687516575821293e-08, "loss": 0.5182, "step": 5698 }, { "epoch": 0.9194901581155212, "grad_norm": 1.4845506263032915, "learning_rate": 1.7617255278727484e-08, "loss": 0.4205, "step": 5699 }, { "epoch": 0.9196515004840271, "grad_norm": 1.6374357924358365, "learning_rate": 1.7547131308622175e-08, "loss": 0.3967, "step": 5700 }, { "epoch": 0.9198128428525331, "grad_norm": 1.6888074401754825, "learning_rate": 1.7477144685468792e-08, "loss": 0.4038, "step": 5701 }, { "epoch": 0.919974185221039, "grad_norm": 1.457592812489858, "learning_rate": 1.7407295429191336e-08, "loss": 0.4099, "step": 5702 }, { "epoch": 0.920135527589545, "grad_norm": 1.7524664391584186, "learning_rate": 1.7337583559674795e-08, "loss": 0.571, "step": 5703 }, { "epoch": 0.9202968699580509, "grad_norm": 1.3034284992954441, "learning_rate": 1.7268009096765235e-08, "loss": 0.3462, "step": 5704 }, { "epoch": 0.920458212326557, "grad_norm": 1.4099784591057454, "learning_rate": 1.7198572060269312e-08, "loss": 0.4255, "step": 5705 }, { "epoch": 0.920619554695063, "grad_norm": 1.9310113481635602, "learning_rate": 1.712927246995466e-08, "loss": 0.5947, "step": 5706 }, { "epoch": 0.9207808970635689, "grad_norm": 1.5618388493793667, "learning_rate": 1.7060110345549937e-08, "loss": 0.639, "step": 5707 }, { "epoch": 0.9209422394320749, "grad_norm": 1.976296086506867, "learning_rate": 1.6991085706744456e-08, "loss": 0.4464, "step": 5708 }, { "epoch": 0.9211035818005808, "grad_norm": 1.4153493010658924, "learning_rate": 1.6922198573188497e-08, "loss": 0.3917, "step": 5709 }, { "epoch": 0.9212649241690868, "grad_norm": 1.701337243505148, "learning_rate": 1.685344896449309e-08, "loss": 0.5742, "step": 5710 }, { "epoch": 0.9214262665375927, "grad_norm": 1.517238348158885, "learning_rate": 1.6784836900230304e-08, "loss": 0.4705, "step": 5711 }, { "epoch": 0.9215876089060987, "grad_norm": 1.375040227128175, "learning_rate": 1.6716362399933014e-08, "loss": 0.2094, "step": 5712 }, { "epoch": 0.9217489512746048, "grad_norm": 1.676960208077248, "learning_rate": 1.6648025483094686e-08, "loss": 0.492, "step": 5713 }, { "epoch": 0.9219102936431107, "grad_norm": 1.5893579439064829, "learning_rate": 1.6579826169169974e-08, "loss": 0.4984, "step": 5714 }, { "epoch": 0.9220716360116167, "grad_norm": 1.6864536714950824, "learning_rate": 1.6511764477573964e-08, "loss": 0.5322, "step": 5715 }, { "epoch": 0.9222329783801226, "grad_norm": 1.9377452941175932, "learning_rate": 1.644384042768293e-08, "loss": 0.6512, "step": 5716 }, { "epoch": 0.9223943207486286, "grad_norm": 1.4287539076063331, "learning_rate": 1.6376054038833798e-08, "loss": 0.3661, "step": 5717 }, { "epoch": 0.9225556631171346, "grad_norm": 1.9193788150412812, "learning_rate": 1.6308405330324294e-08, "loss": 0.6212, "step": 5718 }, { "epoch": 0.9227170054856405, "grad_norm": 1.7528097892708985, "learning_rate": 1.62408943214129e-08, "loss": 0.4468, "step": 5719 }, { "epoch": 0.9228783478541465, "grad_norm": 1.308217373141327, "learning_rate": 1.617352103131914e-08, "loss": 0.4012, "step": 5720 }, { "epoch": 0.9230396902226524, "grad_norm": 1.3897843552453661, "learning_rate": 1.6106285479222936e-08, "loss": 0.4615, "step": 5721 }, { "epoch": 0.9232010325911585, "grad_norm": 2.2993423807163964, "learning_rate": 1.603918768426543e-08, "loss": 0.5881, "step": 5722 }, { "epoch": 0.9233623749596644, "grad_norm": 1.393654384669013, "learning_rate": 1.5972227665548232e-08, "loss": 0.2466, "step": 5723 }, { "epoch": 0.9235237173281704, "grad_norm": 1.338169308338194, "learning_rate": 1.5905405442133867e-08, "loss": 0.3789, "step": 5724 }, { "epoch": 0.9236850596966764, "grad_norm": 1.179116571977595, "learning_rate": 1.5838721033045565e-08, "loss": 0.3344, "step": 5725 }, { "epoch": 0.9238464020651823, "grad_norm": 1.6982659836736709, "learning_rate": 1.577217445726736e-08, "loss": 0.5724, "step": 5726 }, { "epoch": 0.9240077444336883, "grad_norm": 1.4418904801469081, "learning_rate": 1.5705765733744103e-08, "loss": 0.3855, "step": 5727 }, { "epoch": 0.9241690868021942, "grad_norm": 2.0670498456845876, "learning_rate": 1.5639494881381275e-08, "loss": 0.5908, "step": 5728 }, { "epoch": 0.9243304291707002, "grad_norm": 1.4946328941673357, "learning_rate": 1.557336191904518e-08, "loss": 0.3933, "step": 5729 }, { "epoch": 0.9244917715392061, "grad_norm": 1.6282703796029723, "learning_rate": 1.5507366865562976e-08, "loss": 0.3374, "step": 5730 }, { "epoch": 0.9246531139077122, "grad_norm": 1.542864368626159, "learning_rate": 1.5441509739722246e-08, "loss": 0.5085, "step": 5731 }, { "epoch": 0.9248144562762182, "grad_norm": 1.201336807464702, "learning_rate": 1.5375790560271608e-08, "loss": 0.4516, "step": 5732 }, { "epoch": 0.9249757986447241, "grad_norm": 1.6270497672330433, "learning_rate": 1.531020934592031e-08, "loss": 0.4595, "step": 5733 }, { "epoch": 0.9251371410132301, "grad_norm": 1.178844175733203, "learning_rate": 1.524476611533837e-08, "loss": 0.4601, "step": 5734 }, { "epoch": 0.925298483381736, "grad_norm": 1.2524639640695154, "learning_rate": 1.5179460887156436e-08, "loss": 0.5277, "step": 5735 }, { "epoch": 0.925459825750242, "grad_norm": 1.1733773837074977, "learning_rate": 1.5114293679965862e-08, "loss": 0.423, "step": 5736 }, { "epoch": 0.925621168118748, "grad_norm": 1.412740015213676, "learning_rate": 1.5049264512318805e-08, "loss": 0.3591, "step": 5737 }, { "epoch": 0.9257825104872539, "grad_norm": 1.683495042921064, "learning_rate": 1.4984373402728012e-08, "loss": 0.5292, "step": 5738 }, { "epoch": 0.92594385285576, "grad_norm": 1.2852972353328613, "learning_rate": 1.491962036966704e-08, "loss": 0.5449, "step": 5739 }, { "epoch": 0.9261051952242659, "grad_norm": 1.009349921892261, "learning_rate": 1.4855005431570145e-08, "loss": 0.2712, "step": 5740 }, { "epoch": 0.9262665375927719, "grad_norm": 1.623500939489057, "learning_rate": 1.4790528606832108e-08, "loss": 0.3777, "step": 5741 }, { "epoch": 0.9264278799612778, "grad_norm": 1.8569251197463101, "learning_rate": 1.472618991380853e-08, "loss": 0.6032, "step": 5742 }, { "epoch": 0.9265892223297838, "grad_norm": 1.1458866049202925, "learning_rate": 1.4661989370815697e-08, "loss": 0.4455, "step": 5743 }, { "epoch": 0.9267505646982898, "grad_norm": 1.9215532979529293, "learning_rate": 1.4597926996130382e-08, "loss": 0.4542, "step": 5744 }, { "epoch": 0.9269119070667957, "grad_norm": 1.2883711839587746, "learning_rate": 1.4534002807990387e-08, "loss": 0.336, "step": 5745 }, { "epoch": 0.9270732494353017, "grad_norm": 2.1123015271011956, "learning_rate": 1.4470216824593762e-08, "loss": 0.6094, "step": 5746 }, { "epoch": 0.9272345918038076, "grad_norm": 2.0411482042391484, "learning_rate": 1.4406569064099482e-08, "loss": 0.5184, "step": 5747 }, { "epoch": 0.9273959341723137, "grad_norm": 1.2838378915535384, "learning_rate": 1.4343059544627113e-08, "loss": 0.2911, "step": 5748 }, { "epoch": 0.9275572765408197, "grad_norm": 1.6262658397422147, "learning_rate": 1.427968828425674e-08, "loss": 0.5229, "step": 5749 }, { "epoch": 0.9277186189093256, "grad_norm": 1.5773109375940306, "learning_rate": 1.4216455301029273e-08, "loss": 0.5659, "step": 5750 }, { "epoch": 0.9278799612778316, "grad_norm": 1.4713975297957675, "learning_rate": 1.4153360612946197e-08, "loss": 0.3981, "step": 5751 }, { "epoch": 0.9280413036463375, "grad_norm": 1.866539240075389, "learning_rate": 1.4090404237969478e-08, "loss": 0.4911, "step": 5752 }, { "epoch": 0.9282026460148435, "grad_norm": 1.616826334996196, "learning_rate": 1.4027586194022057e-08, "loss": 0.5283, "step": 5753 }, { "epoch": 0.9283639883833494, "grad_norm": 1.4913855190045218, "learning_rate": 1.3964906498987018e-08, "loss": 0.2909, "step": 5754 }, { "epoch": 0.9285253307518554, "grad_norm": 1.5381839006501266, "learning_rate": 1.3902365170708474e-08, "loss": 0.3885, "step": 5755 }, { "epoch": 0.9286866731203615, "grad_norm": 2.3173532849796565, "learning_rate": 1.3839962226990908e-08, "loss": 0.4817, "step": 5756 }, { "epoch": 0.9288480154888674, "grad_norm": 1.3211346837068831, "learning_rate": 1.3777697685599554e-08, "loss": 0.4306, "step": 5757 }, { "epoch": 0.9290093578573734, "grad_norm": 2.01641688205498, "learning_rate": 1.371557156426012e-08, "loss": 0.4336, "step": 5758 }, { "epoch": 0.9291707002258793, "grad_norm": 1.8030370786708096, "learning_rate": 1.3653583880658958e-08, "loss": 0.4175, "step": 5759 }, { "epoch": 0.9293320425943853, "grad_norm": 1.4106789674183549, "learning_rate": 1.3591734652443066e-08, "loss": 0.5616, "step": 5760 }, { "epoch": 0.9294933849628912, "grad_norm": 1.5088097629546786, "learning_rate": 1.3530023897219967e-08, "loss": 0.5194, "step": 5761 }, { "epoch": 0.9296547273313972, "grad_norm": 1.4261364638049772, "learning_rate": 1.3468451632557665e-08, "loss": 0.4145, "step": 5762 }, { "epoch": 0.9298160696999032, "grad_norm": 1.0483747528901861, "learning_rate": 1.3407017875984971e-08, "loss": 0.4635, "step": 5763 }, { "epoch": 0.9299774120684091, "grad_norm": 1.3218023257647933, "learning_rate": 1.3345722644991065e-08, "loss": 0.3804, "step": 5764 }, { "epoch": 0.9301387544369152, "grad_norm": 1.4863284458155042, "learning_rate": 1.3284565957025762e-08, "loss": 0.5435, "step": 5765 }, { "epoch": 0.9303000968054211, "grad_norm": 1.422809419551123, "learning_rate": 1.3223547829499527e-08, "loss": 0.4491, "step": 5766 }, { "epoch": 0.9304614391739271, "grad_norm": 1.4343866551409816, "learning_rate": 1.3162668279783074e-08, "loss": 0.4504, "step": 5767 }, { "epoch": 0.930622781542433, "grad_norm": 1.5089178431589854, "learning_rate": 1.3101927325208151e-08, "loss": 0.473, "step": 5768 }, { "epoch": 0.930784123910939, "grad_norm": 1.5993684207591619, "learning_rate": 1.3041324983066592e-08, "loss": 0.5322, "step": 5769 }, { "epoch": 0.930945466279445, "grad_norm": 1.982144040336894, "learning_rate": 1.2980861270611043e-08, "loss": 0.588, "step": 5770 }, { "epoch": 0.9311068086479509, "grad_norm": 1.2408412139587672, "learning_rate": 1.2920536205054566e-08, "loss": 0.3521, "step": 5771 }, { "epoch": 0.9312681510164569, "grad_norm": 1.4066812489794795, "learning_rate": 1.2860349803570758e-08, "loss": 0.384, "step": 5772 }, { "epoch": 0.9314294933849628, "grad_norm": 1.397861767111064, "learning_rate": 1.2800302083293801e-08, "loss": 0.393, "step": 5773 }, { "epoch": 0.9315908357534689, "grad_norm": 1.648769688694768, "learning_rate": 1.2740393061318356e-08, "loss": 0.3926, "step": 5774 }, { "epoch": 0.9317521781219749, "grad_norm": 1.7691526702720122, "learning_rate": 1.2680622754699611e-08, "loss": 0.4967, "step": 5775 }, { "epoch": 0.9319135204904808, "grad_norm": 1.888509402145885, "learning_rate": 1.2620991180453289e-08, "loss": 0.4401, "step": 5776 }, { "epoch": 0.9320748628589868, "grad_norm": 1.6988174139910097, "learning_rate": 1.2561498355555477e-08, "loss": 0.5569, "step": 5777 }, { "epoch": 0.9322362052274927, "grad_norm": 2.00587827382011, "learning_rate": 1.2502144296942962e-08, "loss": 0.5624, "step": 5778 }, { "epoch": 0.9323975475959987, "grad_norm": 2.2021303569449, "learning_rate": 1.244292902151295e-08, "loss": 0.5728, "step": 5779 }, { "epoch": 0.9325588899645046, "grad_norm": 1.3559282826613461, "learning_rate": 1.2383852546123064e-08, "loss": 0.3094, "step": 5780 }, { "epoch": 0.9327202323330106, "grad_norm": 2.098275716514259, "learning_rate": 1.2324914887591575e-08, "loss": 0.6428, "step": 5781 }, { "epoch": 0.9328815747015167, "grad_norm": 1.6211447098776777, "learning_rate": 1.2266116062696951e-08, "loss": 0.5469, "step": 5782 }, { "epoch": 0.9330429170700226, "grad_norm": 1.3300427671597856, "learning_rate": 1.2207456088178469e-08, "loss": 0.4773, "step": 5783 }, { "epoch": 0.9332042594385286, "grad_norm": 1.8885083921699004, "learning_rate": 1.214893498073577e-08, "loss": 0.6153, "step": 5784 }, { "epoch": 0.9333656018070345, "grad_norm": 1.9144205186733412, "learning_rate": 1.2090552757028748e-08, "loss": 0.3874, "step": 5785 }, { "epoch": 0.9335269441755405, "grad_norm": 1.197513871958747, "learning_rate": 1.2032309433678001e-08, "loss": 0.4816, "step": 5786 }, { "epoch": 0.9336882865440465, "grad_norm": 1.2520261079698565, "learning_rate": 1.1974205027264594e-08, "loss": 0.4758, "step": 5787 }, { "epoch": 0.9338496289125524, "grad_norm": 1.2300668695927628, "learning_rate": 1.1916239554329854e-08, "loss": 0.3789, "step": 5788 }, { "epoch": 0.9340109712810584, "grad_norm": 1.588234576784026, "learning_rate": 1.1858413031375692e-08, "loss": 0.3553, "step": 5789 }, { "epoch": 0.9341723136495643, "grad_norm": 1.2850918739219255, "learning_rate": 1.1800725474864437e-08, "loss": 0.4446, "step": 5790 }, { "epoch": 0.9343336560180704, "grad_norm": 1.3037239674120633, "learning_rate": 1.1743176901218844e-08, "loss": 0.492, "step": 5791 }, { "epoch": 0.9344949983865763, "grad_norm": 1.625436430791944, "learning_rate": 1.168576732682225e-08, "loss": 0.5152, "step": 5792 }, { "epoch": 0.9346563407550823, "grad_norm": 2.392203314870313, "learning_rate": 1.1628496768018082e-08, "loss": 0.5129, "step": 5793 }, { "epoch": 0.9348176831235883, "grad_norm": 1.4246583462104605, "learning_rate": 1.1571365241110465e-08, "loss": 0.3529, "step": 5794 }, { "epoch": 0.9349790254920942, "grad_norm": 1.77208782714207, "learning_rate": 1.1514372762363889e-08, "loss": 0.5356, "step": 5795 }, { "epoch": 0.9351403678606002, "grad_norm": 1.4769744298448113, "learning_rate": 1.1457519348003264e-08, "loss": 0.3471, "step": 5796 }, { "epoch": 0.9353017102291061, "grad_norm": 1.6346513532763458, "learning_rate": 1.1400805014213866e-08, "loss": 0.5606, "step": 5797 }, { "epoch": 0.9354630525976121, "grad_norm": 1.7040230019722769, "learning_rate": 1.1344229777141335e-08, "loss": 0.4817, "step": 5798 }, { "epoch": 0.9356243949661182, "grad_norm": 2.071457916672027, "learning_rate": 1.1287793652891841e-08, "loss": 0.5001, "step": 5799 }, { "epoch": 0.9357857373346241, "grad_norm": 1.3818129035458757, "learning_rate": 1.1231496657531925e-08, "loss": 0.4052, "step": 5800 }, { "epoch": 0.9359470797031301, "grad_norm": 1.394215224758686, "learning_rate": 1.1175338807088319e-08, "loss": 0.4169, "step": 5801 }, { "epoch": 0.936108422071636, "grad_norm": 1.3413638730873154, "learning_rate": 1.1119320117548514e-08, "loss": 0.4477, "step": 5802 }, { "epoch": 0.936269764440142, "grad_norm": 1.807682145413243, "learning_rate": 1.1063440604859975e-08, "loss": 0.5816, "step": 5803 }, { "epoch": 0.9364311068086479, "grad_norm": 1.6470718150367791, "learning_rate": 1.100770028493092e-08, "loss": 0.4129, "step": 5804 }, { "epoch": 0.9365924491771539, "grad_norm": 1.4603866925975098, "learning_rate": 1.0952099173629715e-08, "loss": 0.5398, "step": 5805 }, { "epoch": 0.9367537915456599, "grad_norm": 1.634678408758443, "learning_rate": 1.0896637286785082e-08, "loss": 0.3869, "step": 5806 }, { "epoch": 0.9369151339141658, "grad_norm": 1.7725655180708786, "learning_rate": 1.0841314640186228e-08, "loss": 0.449, "step": 5807 }, { "epoch": 0.9370764762826719, "grad_norm": 1.0138493788582057, "learning_rate": 1.0786131249582609e-08, "loss": 0.3761, "step": 5808 }, { "epoch": 0.9372378186511778, "grad_norm": 1.8190208076138394, "learning_rate": 1.0731087130684103e-08, "loss": 0.4409, "step": 5809 }, { "epoch": 0.9373991610196838, "grad_norm": 1.740662390480258, "learning_rate": 1.0676182299161063e-08, "loss": 0.4243, "step": 5810 }, { "epoch": 0.9375605033881897, "grad_norm": 2.0510345593186106, "learning_rate": 1.062141677064382e-08, "loss": 0.3103, "step": 5811 }, { "epoch": 0.9377218457566957, "grad_norm": 1.5692564796409998, "learning_rate": 1.056679056072346e-08, "loss": 0.3942, "step": 5812 }, { "epoch": 0.9378831881252017, "grad_norm": 1.7364011047611092, "learning_rate": 1.0512303684951152e-08, "loss": 0.394, "step": 5813 }, { "epoch": 0.9380445304937076, "grad_norm": 1.6949838064764895, "learning_rate": 1.0457956158838544e-08, "loss": 0.4998, "step": 5814 }, { "epoch": 0.9382058728622136, "grad_norm": 1.6758822608506503, "learning_rate": 1.0403747997857537e-08, "loss": 0.3567, "step": 5815 }, { "epoch": 0.9383672152307196, "grad_norm": 1.2725054366439665, "learning_rate": 1.0349679217440232e-08, "loss": 0.3191, "step": 5816 }, { "epoch": 0.9385285575992256, "grad_norm": 1.8582422348182512, "learning_rate": 1.0295749832979317e-08, "loss": 0.5443, "step": 5817 }, { "epoch": 0.9386898999677316, "grad_norm": 1.4636173251360742, "learning_rate": 1.024195985982773e-08, "loss": 0.4242, "step": 5818 }, { "epoch": 0.9388512423362375, "grad_norm": 1.9978016453935457, "learning_rate": 1.0188309313298448e-08, "loss": 0.5325, "step": 5819 }, { "epoch": 0.9390125847047435, "grad_norm": 1.8251198089151657, "learning_rate": 1.0134798208665085e-08, "loss": 0.3964, "step": 5820 }, { "epoch": 0.9391739270732494, "grad_norm": 1.667208496075946, "learning_rate": 1.0081426561161398e-08, "loss": 0.4889, "step": 5821 }, { "epoch": 0.9393352694417554, "grad_norm": 1.5844609946363999, "learning_rate": 1.0028194385981515e-08, "loss": 0.3847, "step": 5822 }, { "epoch": 0.9394966118102613, "grad_norm": 1.5915340651288927, "learning_rate": 9.97510169827981e-09, "loss": 0.4668, "step": 5823 }, { "epoch": 0.9396579541787673, "grad_norm": 1.5845521786429253, "learning_rate": 9.922148513170969e-09, "loss": 0.5047, "step": 5824 }, { "epoch": 0.9398192965472734, "grad_norm": 1.4741146242922794, "learning_rate": 9.869334845729883e-09, "loss": 0.4814, "step": 5825 }, { "epoch": 0.9399806389157793, "grad_norm": 1.9919455587040882, "learning_rate": 9.816660710991853e-09, "loss": 0.4843, "step": 5826 }, { "epoch": 0.9401419812842853, "grad_norm": 1.7762551128144741, "learning_rate": 9.764126123952387e-09, "loss": 0.5229, "step": 5827 }, { "epoch": 0.9403033236527912, "grad_norm": 1.5093177831952937, "learning_rate": 9.711731099567355e-09, "loss": 0.3385, "step": 5828 }, { "epoch": 0.9404646660212972, "grad_norm": 1.4912063808409015, "learning_rate": 9.659475652752658e-09, "loss": 0.3471, "step": 5829 }, { "epoch": 0.9406260083898031, "grad_norm": 1.6105771065853116, "learning_rate": 9.607359798384784e-09, "loss": 0.5997, "step": 5830 }, { "epoch": 0.9407873507583091, "grad_norm": 1.8819025143702688, "learning_rate": 9.555383551300256e-09, "loss": 0.4576, "step": 5831 }, { "epoch": 0.9409486931268151, "grad_norm": 1.7858257081828321, "learning_rate": 9.503546926295846e-09, "loss": 0.4833, "step": 5832 }, { "epoch": 0.941110035495321, "grad_norm": 1.1686149519144502, "learning_rate": 9.451849938128752e-09, "loss": 0.3754, "step": 5833 }, { "epoch": 0.9412713778638271, "grad_norm": 1.7279062694895686, "learning_rate": 9.400292601516257e-09, "loss": 0.4869, "step": 5834 }, { "epoch": 0.941432720232333, "grad_norm": 1.5251011298995547, "learning_rate": 9.348874931136008e-09, "loss": 0.3174, "step": 5835 }, { "epoch": 0.941594062600839, "grad_norm": 1.6526012896314268, "learning_rate": 9.29759694162574e-09, "loss": 0.5926, "step": 5836 }, { "epoch": 0.941755404969345, "grad_norm": 1.7150261506069302, "learning_rate": 9.246458647583554e-09, "loss": 0.3928, "step": 5837 }, { "epoch": 0.9419167473378509, "grad_norm": 1.4462641701626437, "learning_rate": 9.195460063567806e-09, "loss": 0.5478, "step": 5838 }, { "epoch": 0.9420780897063569, "grad_norm": 1.5219902010638569, "learning_rate": 9.14460120409688e-09, "loss": 0.4175, "step": 5839 }, { "epoch": 0.9422394320748628, "grad_norm": 1.1872042237399456, "learning_rate": 9.093882083649585e-09, "loss": 0.4513, "step": 5840 }, { "epoch": 0.9424007744433688, "grad_norm": 1.618711186831076, "learning_rate": 9.04330271666498e-09, "loss": 0.3963, "step": 5841 }, { "epoch": 0.9425621168118749, "grad_norm": 1.3315171399075165, "learning_rate": 8.992863117542048e-09, "loss": 0.5229, "step": 5842 }, { "epoch": 0.9427234591803808, "grad_norm": 1.4474549780977628, "learning_rate": 8.942563300640305e-09, "loss": 0.3883, "step": 5843 }, { "epoch": 0.9428848015488868, "grad_norm": 1.3491705748368117, "learning_rate": 8.892403280279237e-09, "loss": 0.3739, "step": 5844 }, { "epoch": 0.9430461439173927, "grad_norm": 1.5778077061520435, "learning_rate": 8.842383070738758e-09, "loss": 0.4527, "step": 5845 }, { "epoch": 0.9432074862858987, "grad_norm": 1.7088449301368795, "learning_rate": 8.792502686258752e-09, "loss": 0.4868, "step": 5846 }, { "epoch": 0.9433688286544046, "grad_norm": 1.8687350830707554, "learning_rate": 8.742762141039472e-09, "loss": 0.462, "step": 5847 }, { "epoch": 0.9435301710229106, "grad_norm": 1.4009264741455614, "learning_rate": 8.693161449241204e-09, "loss": 0.4411, "step": 5848 }, { "epoch": 0.9436915133914165, "grad_norm": 1.2313255596696895, "learning_rate": 8.643700624984595e-09, "loss": 0.5385, "step": 5849 }, { "epoch": 0.9438528557599225, "grad_norm": 2.104688737015053, "learning_rate": 8.594379682350327e-09, "loss": 0.6516, "step": 5850 }, { "epoch": 0.9440141981284286, "grad_norm": 1.6713029827658499, "learning_rate": 8.545198635379392e-09, "loss": 0.6777, "step": 5851 }, { "epoch": 0.9441755404969345, "grad_norm": 1.6712173166552953, "learning_rate": 8.496157498072809e-09, "loss": 0.6912, "step": 5852 }, { "epoch": 0.9443368828654405, "grad_norm": 1.7764472461042935, "learning_rate": 8.447256284391856e-09, "loss": 0.5396, "step": 5853 }, { "epoch": 0.9444982252339464, "grad_norm": 1.5434332426097024, "learning_rate": 8.398495008257955e-09, "loss": 0.4266, "step": 5854 }, { "epoch": 0.9446595676024524, "grad_norm": 1.4205626415390407, "learning_rate": 8.349873683552721e-09, "loss": 0.4308, "step": 5855 }, { "epoch": 0.9448209099709584, "grad_norm": 1.6362700291071448, "learning_rate": 8.301392324117917e-09, "loss": 0.4042, "step": 5856 }, { "epoch": 0.9449822523394643, "grad_norm": 1.7979016232294005, "learning_rate": 8.25305094375539e-09, "loss": 0.3118, "step": 5857 }, { "epoch": 0.9451435947079703, "grad_norm": 4.008448500112202, "learning_rate": 8.20484955622719e-09, "loss": 0.5079, "step": 5858 }, { "epoch": 0.9453049370764763, "grad_norm": 1.3244893295532214, "learning_rate": 8.156788175255557e-09, "loss": 0.4414, "step": 5859 }, { "epoch": 0.9454662794449823, "grad_norm": 1.226018408352427, "learning_rate": 8.10886681452283e-09, "loss": 0.4172, "step": 5860 }, { "epoch": 0.9456276218134883, "grad_norm": 1.8175267854839274, "learning_rate": 8.061085487671481e-09, "loss": 0.5591, "step": 5861 }, { "epoch": 0.9457889641819942, "grad_norm": 1.536073444671884, "learning_rate": 8.013444208304132e-09, "loss": 0.4322, "step": 5862 }, { "epoch": 0.9459503065505002, "grad_norm": 1.461725114475179, "learning_rate": 7.965942989983432e-09, "loss": 0.4165, "step": 5863 }, { "epoch": 0.9461116489190061, "grad_norm": 1.6540508425092963, "learning_rate": 7.918581846232397e-09, "loss": 0.398, "step": 5864 }, { "epoch": 0.9462729912875121, "grad_norm": 1.5602192827308212, "learning_rate": 7.871360790533966e-09, "loss": 0.4574, "step": 5865 }, { "epoch": 0.946434333656018, "grad_norm": 1.37073992649616, "learning_rate": 7.82427983633116e-09, "loss": 0.3566, "step": 5866 }, { "epoch": 0.946595676024524, "grad_norm": 2.040425163169819, "learning_rate": 7.777338997027372e-09, "loss": 0.6048, "step": 5867 }, { "epoch": 0.9467570183930301, "grad_norm": 1.8215843636271791, "learning_rate": 7.730538285985798e-09, "loss": 0.3906, "step": 5868 }, { "epoch": 0.946918360761536, "grad_norm": 1.5158228794189708, "learning_rate": 7.683877716529952e-09, "loss": 0.5279, "step": 5869 }, { "epoch": 0.947079703130042, "grad_norm": 1.3674524540978161, "learning_rate": 7.63735730194337e-09, "loss": 0.3495, "step": 5870 }, { "epoch": 0.9472410454985479, "grad_norm": 1.5465671733940523, "learning_rate": 7.590977055469682e-09, "loss": 0.5148, "step": 5871 }, { "epoch": 0.9474023878670539, "grad_norm": 1.3674619126812901, "learning_rate": 7.54473699031266e-09, "loss": 0.3428, "step": 5872 }, { "epoch": 0.9475637302355598, "grad_norm": 1.5476708724877906, "learning_rate": 7.498637119636164e-09, "loss": 0.609, "step": 5873 }, { "epoch": 0.9477250726040658, "grad_norm": 1.92696537653448, "learning_rate": 7.452677456564138e-09, "loss": 0.5682, "step": 5874 }, { "epoch": 0.9478864149725718, "grad_norm": 3.6783924456053785, "learning_rate": 7.406858014180506e-09, "loss": 0.552, "step": 5875 }, { "epoch": 0.9480477573410777, "grad_norm": 1.8801018921653438, "learning_rate": 7.3611788055293885e-09, "loss": 0.5352, "step": 5876 }, { "epoch": 0.9482090997095838, "grad_norm": 1.6559147045541158, "learning_rate": 7.3156398436149955e-09, "loss": 0.3563, "step": 5877 }, { "epoch": 0.9483704420780897, "grad_norm": 1.5454803499066663, "learning_rate": 7.270241141401567e-09, "loss": 0.5101, "step": 5878 }, { "epoch": 0.9485317844465957, "grad_norm": 1.8353164596423968, "learning_rate": 7.2249827118134325e-09, "loss": 0.4858, "step": 5879 }, { "epoch": 0.9486931268151017, "grad_norm": 1.3419065028109733, "learning_rate": 7.17986456773495e-09, "loss": 0.3844, "step": 5880 }, { "epoch": 0.9488544691836076, "grad_norm": 1.56958028698693, "learning_rate": 7.1348867220105136e-09, "loss": 0.3923, "step": 5881 }, { "epoch": 0.9490158115521136, "grad_norm": 1.8105321922653366, "learning_rate": 7.090049187444714e-09, "loss": 0.376, "step": 5882 }, { "epoch": 0.9491771539206195, "grad_norm": 1.519980317665962, "learning_rate": 7.045351976802061e-09, "loss": 0.375, "step": 5883 }, { "epoch": 0.9493384962891255, "grad_norm": 1.5070316486743738, "learning_rate": 7.000795102807211e-09, "loss": 0.5294, "step": 5884 }, { "epoch": 0.9494998386576315, "grad_norm": 1.5544291526033784, "learning_rate": 6.956378578144795e-09, "loss": 0.4141, "step": 5885 }, { "epoch": 0.9496611810261375, "grad_norm": 1.1524344198472403, "learning_rate": 6.912102415459475e-09, "loss": 0.4934, "step": 5886 }, { "epoch": 0.9498225233946435, "grad_norm": 1.2214429398777487, "learning_rate": 6.867966627356114e-09, "loss": 0.5528, "step": 5887 }, { "epoch": 0.9499838657631494, "grad_norm": 1.363296158935871, "learning_rate": 6.8239712263993836e-09, "loss": 0.3742, "step": 5888 }, { "epoch": 0.9501452081316554, "grad_norm": 1.325528242868675, "learning_rate": 6.780116225114152e-09, "loss": 0.4106, "step": 5889 }, { "epoch": 0.9503065505001613, "grad_norm": 2.1322665790920103, "learning_rate": 6.736401635985267e-09, "loss": 0.5714, "step": 5890 }, { "epoch": 0.9504678928686673, "grad_norm": 1.2082148800810557, "learning_rate": 6.692827471457607e-09, "loss": 0.5268, "step": 5891 }, { "epoch": 0.9506292352371732, "grad_norm": 1.9155758849874964, "learning_rate": 6.649393743936027e-09, "loss": 0.5149, "step": 5892 }, { "epoch": 0.9507905776056792, "grad_norm": 1.3641887802975785, "learning_rate": 6.606100465785525e-09, "loss": 0.4873, "step": 5893 }, { "epoch": 0.9509519199741853, "grad_norm": 1.1625541284736658, "learning_rate": 6.562947649330852e-09, "loss": 0.4628, "step": 5894 }, { "epoch": 0.9511132623426912, "grad_norm": 1.4203214437373108, "learning_rate": 6.519935306857239e-09, "loss": 0.4344, "step": 5895 }, { "epoch": 0.9512746047111972, "grad_norm": 1.2887124713424791, "learning_rate": 6.477063450609388e-09, "loss": 0.3879, "step": 5896 }, { "epoch": 0.9514359470797031, "grad_norm": 1.5435842324650908, "learning_rate": 6.434332092792427e-09, "loss": 0.442, "step": 5897 }, { "epoch": 0.9515972894482091, "grad_norm": 1.3085325767680358, "learning_rate": 6.391741245571236e-09, "loss": 0.4287, "step": 5898 }, { "epoch": 0.951758631816715, "grad_norm": 1.6916565038190974, "learning_rate": 6.349290921070782e-09, "loss": 0.5121, "step": 5899 }, { "epoch": 0.951919974185221, "grad_norm": 1.1310728724787633, "learning_rate": 6.306981131376122e-09, "loss": 0.4893, "step": 5900 }, { "epoch": 0.952081316553727, "grad_norm": 1.2859222077472032, "learning_rate": 6.264811888532062e-09, "loss": 0.3044, "step": 5901 }, { "epoch": 0.952242658922233, "grad_norm": 1.4806033755164887, "learning_rate": 6.222783204543558e-09, "loss": 0.4492, "step": 5902 }, { "epoch": 0.952404001290739, "grad_norm": 1.486120703281015, "learning_rate": 6.180895091375648e-09, "loss": 0.5091, "step": 5903 }, { "epoch": 0.952565343659245, "grad_norm": 1.038504601330176, "learning_rate": 6.139147560953128e-09, "loss": 0.4254, "step": 5904 }, { "epoch": 0.9527266860277509, "grad_norm": 1.6674607372093917, "learning_rate": 6.097540625160935e-09, "loss": 0.4779, "step": 5905 }, { "epoch": 0.9528880283962569, "grad_norm": 1.9816994840098554, "learning_rate": 6.056074295843872e-09, "loss": 0.5718, "step": 5906 }, { "epoch": 0.9530493707647628, "grad_norm": 1.6951862059632812, "learning_rate": 6.014748584806828e-09, "loss": 0.3663, "step": 5907 }, { "epoch": 0.9532107131332688, "grad_norm": 1.917634581377618, "learning_rate": 5.973563503814616e-09, "loss": 0.4897, "step": 5908 }, { "epoch": 0.9533720555017747, "grad_norm": 1.5033904541114487, "learning_rate": 5.932519064591968e-09, "loss": 0.4137, "step": 5909 }, { "epoch": 0.9535333978702807, "grad_norm": 1.1012946269836121, "learning_rate": 5.891615278823537e-09, "loss": 0.4864, "step": 5910 }, { "epoch": 0.9536947402387868, "grad_norm": 1.857375087208922, "learning_rate": 5.850852158154173e-09, "loss": 0.5206, "step": 5911 }, { "epoch": 0.9538560826072927, "grad_norm": 1.302961658513837, "learning_rate": 5.81022971418832e-09, "loss": 0.445, "step": 5912 }, { "epoch": 0.9540174249757987, "grad_norm": 1.920131605922813, "learning_rate": 5.769747958490722e-09, "loss": 0.4875, "step": 5913 }, { "epoch": 0.9541787673443046, "grad_norm": 1.7080028570941908, "learning_rate": 5.729406902585831e-09, "loss": 0.6082, "step": 5914 }, { "epoch": 0.9543401097128106, "grad_norm": 1.5612992823009861, "learning_rate": 5.689206557958126e-09, "loss": 0.5641, "step": 5915 }, { "epoch": 0.9545014520813165, "grad_norm": 1.8216038654294757, "learning_rate": 5.649146936052063e-09, "loss": 0.3757, "step": 5916 }, { "epoch": 0.9546627944498225, "grad_norm": 1.801876703076942, "learning_rate": 5.609228048271963e-09, "loss": 0.4682, "step": 5917 }, { "epoch": 0.9548241368183285, "grad_norm": 1.5840397647528799, "learning_rate": 5.569449905982238e-09, "loss": 0.4467, "step": 5918 }, { "epoch": 0.9549854791868344, "grad_norm": 1.3620531294258844, "learning_rate": 5.529812520506938e-09, "loss": 0.3734, "step": 5919 }, { "epoch": 0.9551468215553405, "grad_norm": 2.1780974902217762, "learning_rate": 5.490315903130371e-09, "loss": 0.471, "step": 5920 }, { "epoch": 0.9553081639238464, "grad_norm": 2.5517438909338477, "learning_rate": 5.4509600650965995e-09, "loss": 0.576, "step": 5921 }, { "epoch": 0.9554695062923524, "grad_norm": 1.9923038822661072, "learning_rate": 5.411745017609493e-09, "loss": 0.5451, "step": 5922 }, { "epoch": 0.9556308486608583, "grad_norm": 1.4930318470730997, "learning_rate": 5.372670771833121e-09, "loss": 0.4945, "step": 5923 }, { "epoch": 0.9557921910293643, "grad_norm": 1.153808748727111, "learning_rate": 5.333737338891309e-09, "loss": 0.4051, "step": 5924 }, { "epoch": 0.9559535333978703, "grad_norm": 1.849526940253972, "learning_rate": 5.2949447298678005e-09, "loss": 0.4305, "step": 5925 }, { "epoch": 0.9561148757663762, "grad_norm": 1.4034754614918732, "learning_rate": 5.256292955806207e-09, "loss": 0.4236, "step": 5926 }, { "epoch": 0.9562762181348822, "grad_norm": 2.327657870338, "learning_rate": 5.21778202771006e-09, "loss": 0.5503, "step": 5927 }, { "epoch": 0.9564375605033882, "grad_norm": 1.254722543419447, "learning_rate": 5.179411956542978e-09, "loss": 0.4649, "step": 5928 }, { "epoch": 0.9565989028718942, "grad_norm": 1.2314316146052446, "learning_rate": 5.1411827532282236e-09, "loss": 0.4129, "step": 5929 }, { "epoch": 0.9567602452404002, "grad_norm": 1.3433166071147853, "learning_rate": 5.103094428649091e-09, "loss": 0.3952, "step": 5930 }, { "epoch": 0.9569215876089061, "grad_norm": 1.6873236670024405, "learning_rate": 5.065146993648739e-09, "loss": 0.52, "step": 5931 }, { "epoch": 0.9570829299774121, "grad_norm": 1.4606911354615466, "learning_rate": 5.027340459030249e-09, "loss": 0.3568, "step": 5932 }, { "epoch": 0.957244272345918, "grad_norm": 2.553717472804735, "learning_rate": 4.989674835556568e-09, "loss": 0.5241, "step": 5933 }, { "epoch": 0.957405614714424, "grad_norm": 1.6454505797261263, "learning_rate": 4.952150133950506e-09, "loss": 0.5244, "step": 5934 }, { "epoch": 0.9575669570829299, "grad_norm": 1.375840320434635, "learning_rate": 4.9147663648946844e-09, "loss": 0.3575, "step": 5935 }, { "epoch": 0.9577282994514359, "grad_norm": 1.6757043496295978, "learning_rate": 4.877523539031814e-09, "loss": 0.4168, "step": 5936 }, { "epoch": 0.957889641819942, "grad_norm": 1.6773603064506595, "learning_rate": 4.840421666964356e-09, "loss": 0.4089, "step": 5937 }, { "epoch": 0.9580509841884479, "grad_norm": 1.4940046659955923, "learning_rate": 4.803460759254585e-09, "loss": 0.2988, "step": 5938 }, { "epoch": 0.9582123265569539, "grad_norm": 1.7744108377447307, "learning_rate": 4.766640826424751e-09, "loss": 0.5763, "step": 5939 }, { "epoch": 0.9583736689254598, "grad_norm": 1.4485512401773406, "learning_rate": 4.729961878956801e-09, "loss": 0.4811, "step": 5940 }, { "epoch": 0.9585350112939658, "grad_norm": 1.0827774064183162, "learning_rate": 4.6934239272928836e-09, "loss": 0.4405, "step": 5941 }, { "epoch": 0.9586963536624717, "grad_norm": 1.5578525090412594, "learning_rate": 4.657026981834622e-09, "loss": 0.4411, "step": 5942 }, { "epoch": 0.9588576960309777, "grad_norm": 1.3047067789263764, "learning_rate": 4.6207710529437285e-09, "loss": 0.4883, "step": 5943 }, { "epoch": 0.9590190383994837, "grad_norm": 2.342979202679668, "learning_rate": 4.584656150941779e-09, "loss": 0.5608, "step": 5944 }, { "epoch": 0.9591803807679897, "grad_norm": 1.776510458312318, "learning_rate": 4.548682286109995e-09, "loss": 0.4152, "step": 5945 }, { "epoch": 0.9593417231364957, "grad_norm": 1.6804045100505032, "learning_rate": 4.512849468689683e-09, "loss": 0.4691, "step": 5946 }, { "epoch": 0.9595030655050016, "grad_norm": 2.1913444523929146, "learning_rate": 4.477157708881851e-09, "loss": 0.5524, "step": 5947 }, { "epoch": 0.9596644078735076, "grad_norm": 1.097283405229158, "learning_rate": 4.441607016847482e-09, "loss": 0.4044, "step": 5948 }, { "epoch": 0.9598257502420136, "grad_norm": 1.444623710846299, "learning_rate": 4.406197402707202e-09, "loss": 0.3726, "step": 5949 }, { "epoch": 0.9599870926105195, "grad_norm": 1.6193385906241458, "learning_rate": 4.370928876541613e-09, "loss": 0.4891, "step": 5950 }, { "epoch": 0.9601484349790255, "grad_norm": 1.5633499885328006, "learning_rate": 4.33580144839113e-09, "loss": 0.4181, "step": 5951 }, { "epoch": 0.9603097773475314, "grad_norm": 2.0531967034440335, "learning_rate": 4.300815128256086e-09, "loss": 0.52, "step": 5952 }, { "epoch": 0.9604711197160374, "grad_norm": 1.4549238494399175, "learning_rate": 4.2659699260964576e-09, "loss": 0.4826, "step": 5953 }, { "epoch": 0.9606324620845434, "grad_norm": 2.0599445348513195, "learning_rate": 4.231265851832144e-09, "loss": 0.4902, "step": 5954 }, { "epoch": 0.9607938044530494, "grad_norm": 1.4975991904344164, "learning_rate": 4.196702915342909e-09, "loss": 0.3889, "step": 5955 }, { "epoch": 0.9609551468215554, "grad_norm": 2.0381555403708482, "learning_rate": 4.162281126468214e-09, "loss": 0.5487, "step": 5956 }, { "epoch": 0.9611164891900613, "grad_norm": 1.7980875319368392, "learning_rate": 4.1280004950075555e-09, "loss": 0.5232, "step": 5957 }, { "epoch": 0.9612778315585673, "grad_norm": 1.7303116772322293, "learning_rate": 4.093861030719903e-09, "loss": 0.4638, "step": 5958 }, { "epoch": 0.9614391739270732, "grad_norm": 1.64178919585032, "learning_rate": 4.059862743324427e-09, "loss": 0.5688, "step": 5959 }, { "epoch": 0.9616005162955792, "grad_norm": 1.4192559571026082, "learning_rate": 4.02600564249983e-09, "loss": 0.5109, "step": 5960 }, { "epoch": 0.9617618586640851, "grad_norm": 1.4605148438427742, "learning_rate": 3.99228973788468e-09, "loss": 0.3749, "step": 5961 }, { "epoch": 0.9619232010325912, "grad_norm": 1.3158902931487908, "learning_rate": 3.958715039077465e-09, "loss": 0.2394, "step": 5962 }, { "epoch": 0.9620845434010972, "grad_norm": 1.4748029771243476, "learning_rate": 3.925281555636317e-09, "loss": 0.336, "step": 5963 }, { "epoch": 0.9622458857696031, "grad_norm": 2.035769789280883, "learning_rate": 3.89198929707929e-09, "loss": 0.4088, "step": 5964 }, { "epoch": 0.9624072281381091, "grad_norm": 1.294537747630985, "learning_rate": 3.8588382728841904e-09, "loss": 0.4348, "step": 5965 }, { "epoch": 0.962568570506615, "grad_norm": 1.270733214431663, "learning_rate": 3.825828492488526e-09, "loss": 0.4554, "step": 5966 }, { "epoch": 0.962729912875121, "grad_norm": 1.5187014787874353, "learning_rate": 3.792959965289777e-09, "loss": 0.4786, "step": 5967 }, { "epoch": 0.962891255243627, "grad_norm": 1.5208931828397412, "learning_rate": 3.760232700645016e-09, "loss": 0.5139, "step": 5968 }, { "epoch": 0.9630525976121329, "grad_norm": 1.720182272878563, "learning_rate": 3.727646707871235e-09, "loss": 0.4144, "step": 5969 }, { "epoch": 0.9632139399806389, "grad_norm": 1.716755767781468, "learning_rate": 3.6952019962451787e-09, "loss": 0.4526, "step": 5970 }, { "epoch": 0.9633752823491449, "grad_norm": 1.4276322564422532, "learning_rate": 3.6628985750034036e-09, "loss": 0.4031, "step": 5971 }, { "epoch": 0.9635366247176509, "grad_norm": 1.2785036379598658, "learning_rate": 3.6307364533420536e-09, "loss": 0.3764, "step": 5972 }, { "epoch": 0.9636979670861568, "grad_norm": 2.3958994206698376, "learning_rate": 3.5987156404173604e-09, "loss": 0.587, "step": 5973 }, { "epoch": 0.9638593094546628, "grad_norm": 1.2662572811930353, "learning_rate": 3.5668361453450313e-09, "loss": 0.4977, "step": 5974 }, { "epoch": 0.9640206518231688, "grad_norm": 2.477215605579106, "learning_rate": 3.535097977200807e-09, "loss": 0.5229, "step": 5975 }, { "epoch": 0.9641819941916747, "grad_norm": 1.296772550649724, "learning_rate": 3.5035011450199046e-09, "loss": 0.4336, "step": 5976 }, { "epoch": 0.9643433365601807, "grad_norm": 1.4761234014041968, "learning_rate": 3.4720456577975732e-09, "loss": 0.2943, "step": 5977 }, { "epoch": 0.9645046789286866, "grad_norm": 1.467915886561049, "learning_rate": 3.4407315244886494e-09, "loss": 0.4781, "step": 5978 }, { "epoch": 0.9646660212971926, "grad_norm": 1.6753397895854552, "learning_rate": 3.4095587540077798e-09, "loss": 0.5612, "step": 5979 }, { "epoch": 0.9648273636656987, "grad_norm": 1.73169742132247, "learning_rate": 3.3785273552294767e-09, "loss": 0.5974, "step": 5980 }, { "epoch": 0.9649887060342046, "grad_norm": 1.7630564142766036, "learning_rate": 3.3476373369877854e-09, "loss": 0.4794, "step": 5981 }, { "epoch": 0.9651500484027106, "grad_norm": 1.3799891987668143, "learning_rate": 3.316888708076615e-09, "loss": 0.4974, "step": 5982 }, { "epoch": 0.9653113907712165, "grad_norm": 1.629761761649382, "learning_rate": 3.2862814772497973e-09, "loss": 0.4757, "step": 5983 }, { "epoch": 0.9654727331397225, "grad_norm": 4.102129797077405, "learning_rate": 3.2558156532205283e-09, "loss": 0.4707, "step": 5984 }, { "epoch": 0.9656340755082284, "grad_norm": 1.4329563979645044, "learning_rate": 3.225491244662093e-09, "loss": 0.4139, "step": 5985 }, { "epoch": 0.9657954178767344, "grad_norm": 1.4915147632989738, "learning_rate": 3.195308260207308e-09, "loss": 0.3442, "step": 5986 }, { "epoch": 0.9659567602452404, "grad_norm": 1.3171603253187096, "learning_rate": 3.1652667084489104e-09, "loss": 0.4123, "step": 5987 }, { "epoch": 0.9661181026137464, "grad_norm": 1.3934010432338408, "learning_rate": 3.135366597939226e-09, "loss": 0.3667, "step": 5988 }, { "epoch": 0.9662794449822524, "grad_norm": 1.5679376968783276, "learning_rate": 3.1056079371903354e-09, "loss": 0.3632, "step": 5989 }, { "epoch": 0.9664407873507583, "grad_norm": 1.6172082872022258, "learning_rate": 3.0759907346740167e-09, "loss": 0.4684, "step": 5990 }, { "epoch": 0.9666021297192643, "grad_norm": 1.5244691143566804, "learning_rate": 3.0465149988219695e-09, "loss": 0.3129, "step": 5991 }, { "epoch": 0.9667634720877702, "grad_norm": 1.5177869027178248, "learning_rate": 3.0171807380254265e-09, "loss": 0.4852, "step": 5992 }, { "epoch": 0.9669248144562762, "grad_norm": 1.304113753020284, "learning_rate": 2.987987960635374e-09, "loss": 0.5558, "step": 5993 }, { "epoch": 0.9670861568247822, "grad_norm": 2.032566172330821, "learning_rate": 2.9589366749625534e-09, "loss": 0.5519, "step": 5994 }, { "epoch": 0.9672474991932881, "grad_norm": 1.6176754864831455, "learning_rate": 2.9300268892774595e-09, "loss": 0.5817, "step": 5995 }, { "epoch": 0.9674088415617941, "grad_norm": 1.6000577230769972, "learning_rate": 2.901258611810231e-09, "loss": 0.6069, "step": 5996 }, { "epoch": 0.9675701839303001, "grad_norm": 1.4681507267815088, "learning_rate": 2.872631850750762e-09, "loss": 0.5631, "step": 5997 }, { "epoch": 0.9677315262988061, "grad_norm": 1.5435464670782229, "learning_rate": 2.844146614248699e-09, "loss": 0.5129, "step": 5998 }, { "epoch": 0.9678928686673121, "grad_norm": 1.54042271955541, "learning_rate": 2.8158029104132785e-09, "loss": 0.3773, "step": 5999 }, { "epoch": 0.968054211035818, "grad_norm": 1.3361083250530026, "learning_rate": 2.7876007473135453e-09, "loss": 0.2729, "step": 6000 }, { "epoch": 0.968215553404324, "grad_norm": 1.8331166124851064, "learning_rate": 2.759540132978244e-09, "loss": 0.5316, "step": 6001 }, { "epoch": 0.9683768957728299, "grad_norm": 1.9638336193947075, "learning_rate": 2.731621075395818e-09, "loss": 0.6838, "step": 6002 }, { "epoch": 0.9685382381413359, "grad_norm": 1.399416490274521, "learning_rate": 2.7038435825143535e-09, "loss": 0.4426, "step": 6003 }, { "epoch": 0.9686995805098418, "grad_norm": 1.9499617328312435, "learning_rate": 2.676207662241692e-09, "loss": 0.3914, "step": 6004 }, { "epoch": 0.9688609228783479, "grad_norm": 1.419869066434388, "learning_rate": 2.648713322445373e-09, "loss": 0.3135, "step": 6005 }, { "epoch": 0.9690222652468539, "grad_norm": 1.2673941127480841, "learning_rate": 2.62136057095258e-09, "loss": 0.4562, "step": 6006 }, { "epoch": 0.9691836076153598, "grad_norm": 1.6420194240401575, "learning_rate": 2.5941494155502506e-09, "loss": 0.4485, "step": 6007 }, { "epoch": 0.9693449499838658, "grad_norm": 1.604666925017956, "learning_rate": 2.567079863985022e-09, "loss": 0.4472, "step": 6008 }, { "epoch": 0.9695062923523717, "grad_norm": 1.6979024765576238, "learning_rate": 2.5401519239630633e-09, "loss": 0.4443, "step": 6009 }, { "epoch": 0.9696676347208777, "grad_norm": 2.030592474519913, "learning_rate": 2.5133656031504636e-09, "loss": 0.4836, "step": 6010 }, { "epoch": 0.9698289770893836, "grad_norm": 2.0189226956506245, "learning_rate": 2.486720909172846e-09, "loss": 0.5435, "step": 6011 }, { "epoch": 0.9699903194578896, "grad_norm": 1.53664041076097, "learning_rate": 2.460217849615531e-09, "loss": 0.3891, "step": 6012 }, { "epoch": 0.9701516618263956, "grad_norm": 1.5194564840167943, "learning_rate": 2.43385643202354e-09, "loss": 0.453, "step": 6013 }, { "epoch": 0.9703130041949016, "grad_norm": 1.8405951971934624, "learning_rate": 2.407636663901591e-09, "loss": 0.3927, "step": 6014 }, { "epoch": 0.9704743465634076, "grad_norm": 1.9182341289084934, "learning_rate": 2.3815585527139913e-09, "loss": 0.4638, "step": 6015 }, { "epoch": 0.9706356889319135, "grad_norm": 1.4925512699286005, "learning_rate": 2.3556221058848024e-09, "loss": 0.46, "step": 6016 }, { "epoch": 0.9707970313004195, "grad_norm": 1.3730213059802532, "learning_rate": 2.3298273307977846e-09, "loss": 0.4514, "step": 6017 }, { "epoch": 0.9709583736689255, "grad_norm": 1.5402679370045598, "learning_rate": 2.30417423479623e-09, "loss": 0.3918, "step": 6018 }, { "epoch": 0.9711197160374314, "grad_norm": 1.4620266694533972, "learning_rate": 2.278662825183297e-09, "loss": 0.5328, "step": 6019 }, { "epoch": 0.9712810584059374, "grad_norm": 1.6414237848041562, "learning_rate": 2.2532931092215657e-09, "loss": 0.4976, "step": 6020 }, { "epoch": 0.9714424007744433, "grad_norm": 1.7589933371013777, "learning_rate": 2.2280650941335357e-09, "loss": 0.6746, "step": 6021 }, { "epoch": 0.9716037431429493, "grad_norm": 1.9380017523034268, "learning_rate": 2.202978787101073e-09, "loss": 0.5432, "step": 6022 }, { "epoch": 0.9717650855114554, "grad_norm": 1.667440004541977, "learning_rate": 2.178034195266021e-09, "loss": 0.4504, "step": 6023 }, { "epoch": 0.9719264278799613, "grad_norm": 1.5165426877808927, "learning_rate": 2.1532313257296986e-09, "loss": 0.3865, "step": 6024 }, { "epoch": 0.9720877702484673, "grad_norm": 1.7976455363220032, "learning_rate": 2.1285701855530138e-09, "loss": 0.5684, "step": 6025 }, { "epoch": 0.9722491126169732, "grad_norm": 1.0195808252434437, "learning_rate": 2.104050781756683e-09, "loss": 0.4165, "step": 6026 }, { "epoch": 0.9724104549854792, "grad_norm": 1.4265464088226005, "learning_rate": 2.0796731213210128e-09, "loss": 0.435, "step": 6027 }, { "epoch": 0.9725717973539851, "grad_norm": 1.3321685720231482, "learning_rate": 2.0554372111859506e-09, "loss": 0.4572, "step": 6028 }, { "epoch": 0.9727331397224911, "grad_norm": 2.9077789222709525, "learning_rate": 2.031343058251034e-09, "loss": 0.5074, "step": 6029 }, { "epoch": 0.972894482090997, "grad_norm": 1.3549943692980675, "learning_rate": 2.0073906693755526e-09, "loss": 0.2934, "step": 6030 }, { "epoch": 0.9730558244595031, "grad_norm": 1.5676302800650435, "learning_rate": 1.9835800513783305e-09, "loss": 0.4995, "step": 6031 }, { "epoch": 0.9732171668280091, "grad_norm": 1.9391770641778745, "learning_rate": 1.9599112110379457e-09, "loss": 0.5543, "step": 6032 }, { "epoch": 0.973378509196515, "grad_norm": 1.5212364533447333, "learning_rate": 1.9363841550925076e-09, "loss": 0.4469, "step": 6033 }, { "epoch": 0.973539851565021, "grad_norm": 1.8694666276245098, "learning_rate": 1.912998890239881e-09, "loss": 0.4284, "step": 6034 }, { "epoch": 0.9737011939335269, "grad_norm": 1.3745966666550988, "learning_rate": 1.8897554231374623e-09, "loss": 0.4533, "step": 6035 }, { "epoch": 0.9738625363020329, "grad_norm": 1.8055253499104662, "learning_rate": 1.866653760402237e-09, "loss": 0.3274, "step": 6036 }, { "epoch": 0.9740238786705389, "grad_norm": 1.6170913004250942, "learning_rate": 1.8436939086109993e-09, "loss": 0.4963, "step": 6037 }, { "epoch": 0.9741852210390448, "grad_norm": 1.8988456758222847, "learning_rate": 1.8208758743000207e-09, "loss": 0.5461, "step": 6038 }, { "epoch": 0.9743465634075508, "grad_norm": 1.1784921615333435, "learning_rate": 1.7981996639652164e-09, "loss": 0.3934, "step": 6039 }, { "epoch": 0.9745079057760568, "grad_norm": 1.489678715495318, "learning_rate": 1.7756652840622e-09, "loss": 0.3917, "step": 6040 }, { "epoch": 0.9746692481445628, "grad_norm": 1.4730247623358923, "learning_rate": 1.753272741006173e-09, "loss": 0.4066, "step": 6041 }, { "epoch": 0.9748305905130688, "grad_norm": 1.766608512868963, "learning_rate": 1.7310220411719256e-09, "loss": 0.4603, "step": 6042 }, { "epoch": 0.9749919328815747, "grad_norm": 1.593718434937495, "learning_rate": 1.7089131908938348e-09, "loss": 0.4931, "step": 6043 }, { "epoch": 0.9751532752500807, "grad_norm": 1.5288188864532954, "learning_rate": 1.686946196466088e-09, "loss": 0.3558, "step": 6044 }, { "epoch": 0.9753146176185866, "grad_norm": 2.1649855425133886, "learning_rate": 1.6651210641422385e-09, "loss": 0.6727, "step": 6045 }, { "epoch": 0.9754759599870926, "grad_norm": 1.620563981231162, "learning_rate": 1.6434378001356497e-09, "loss": 0.5626, "step": 6046 }, { "epoch": 0.9756373023555985, "grad_norm": 1.29079551955794, "learning_rate": 1.6218964106191613e-09, "loss": 0.3184, "step": 6047 }, { "epoch": 0.9757986447241046, "grad_norm": 1.3485020291656513, "learning_rate": 1.6004969017252567e-09, "loss": 0.4525, "step": 6048 }, { "epoch": 0.9759599870926106, "grad_norm": 1.146886491847866, "learning_rate": 1.579239279546063e-09, "loss": 0.3393, "step": 6049 }, { "epoch": 0.9761213294611165, "grad_norm": 1.7826831473147697, "learning_rate": 1.5581235501334055e-09, "loss": 0.5246, "step": 6050 }, { "epoch": 0.9762826718296225, "grad_norm": 1.4372278868161332, "learning_rate": 1.5371497194984206e-09, "loss": 0.4037, "step": 6051 }, { "epoch": 0.9764440141981284, "grad_norm": 1.6130631786680378, "learning_rate": 1.5163177936122207e-09, "loss": 0.4301, "step": 6052 }, { "epoch": 0.9766053565666344, "grad_norm": 2.0844604749182896, "learning_rate": 1.4956277784052284e-09, "loss": 0.392, "step": 6053 }, { "epoch": 0.9767666989351403, "grad_norm": 1.8333960941438296, "learning_rate": 1.4750796797675658e-09, "loss": 0.4852, "step": 6054 }, { "epoch": 0.9769280413036463, "grad_norm": 1.5695815781327895, "learning_rate": 1.4546735035489977e-09, "loss": 0.5634, "step": 6055 }, { "epoch": 0.9770893836721523, "grad_norm": 1.7285360388978168, "learning_rate": 1.4344092555588772e-09, "loss": 0.5641, "step": 6056 }, { "epoch": 0.9772507260406583, "grad_norm": 1.4113246483737538, "learning_rate": 1.4142869415660896e-09, "loss": 0.4045, "step": 6057 }, { "epoch": 0.9774120684091643, "grad_norm": 1.5884770698129107, "learning_rate": 1.3943065672991638e-09, "loss": 0.3479, "step": 6058 }, { "epoch": 0.9775734107776702, "grad_norm": 1.4451632035960769, "learning_rate": 1.3744681384462164e-09, "loss": 0.4323, "step": 6059 }, { "epoch": 0.9777347531461762, "grad_norm": 1.3480810850274463, "learning_rate": 1.3547716606548965e-09, "loss": 0.3939, "step": 6060 }, { "epoch": 0.9778960955146822, "grad_norm": 1.4714976643042477, "learning_rate": 1.3352171395325517e-09, "loss": 0.3764, "step": 6061 }, { "epoch": 0.9780574378831881, "grad_norm": 1.5889449144517596, "learning_rate": 1.3158045806460073e-09, "loss": 0.2936, "step": 6062 }, { "epoch": 0.9782187802516941, "grad_norm": 1.7311865564282662, "learning_rate": 1.2965339895217308e-09, "loss": 0.3817, "step": 6063 }, { "epoch": 0.9783801226202, "grad_norm": 1.4724012401991151, "learning_rate": 1.277405371645779e-09, "loss": 0.2489, "step": 6064 }, { "epoch": 0.9785414649887061, "grad_norm": 1.64816571205264, "learning_rate": 1.258418732463795e-09, "loss": 0.5251, "step": 6065 }, { "epoch": 0.978702807357212, "grad_norm": 1.813843327966394, "learning_rate": 1.239574077380956e-09, "loss": 0.394, "step": 6066 }, { "epoch": 0.978864149725718, "grad_norm": 2.258430686719769, "learning_rate": 1.2208714117620256e-09, "loss": 0.4526, "step": 6067 }, { "epoch": 0.979025492094224, "grad_norm": 1.7839699440763563, "learning_rate": 1.2023107409314115e-09, "loss": 0.3804, "step": 6068 }, { "epoch": 0.9791868344627299, "grad_norm": 1.540078849012458, "learning_rate": 1.1838920701730537e-09, "loss": 0.4897, "step": 6069 }, { "epoch": 0.9793481768312359, "grad_norm": 1.591165054463252, "learning_rate": 1.165615404730369e-09, "loss": 0.489, "step": 6070 }, { "epoch": 0.9795095191997418, "grad_norm": 2.0289383866236768, "learning_rate": 1.1474807498066396e-09, "loss": 0.5964, "step": 6071 }, { "epoch": 0.9796708615682478, "grad_norm": 1.3719632687336767, "learning_rate": 1.1294881105643471e-09, "loss": 0.3388, "step": 6072 }, { "epoch": 0.9798322039367537, "grad_norm": 1.1867617269723714, "learning_rate": 1.1116374921257831e-09, "loss": 0.257, "step": 6073 }, { "epoch": 0.9799935463052598, "grad_norm": 1.2661752799640262, "learning_rate": 1.0939288995727157e-09, "loss": 0.4082, "step": 6074 }, { "epoch": 0.9801548886737658, "grad_norm": 1.7787824571748516, "learning_rate": 1.0763623379465569e-09, "loss": 0.3227, "step": 6075 }, { "epoch": 0.9803162310422717, "grad_norm": 1.5831590941410312, "learning_rate": 1.0589378122482507e-09, "loss": 0.5708, "step": 6076 }, { "epoch": 0.9804775734107777, "grad_norm": 2.2563691591197044, "learning_rate": 1.0416553274382733e-09, "loss": 0.6198, "step": 6077 }, { "epoch": 0.9806389157792836, "grad_norm": 1.3349294595976768, "learning_rate": 1.0245148884366339e-09, "loss": 0.386, "step": 6078 }, { "epoch": 0.9808002581477896, "grad_norm": 2.057130934476362, "learning_rate": 1.0075165001229846e-09, "loss": 0.3507, "step": 6079 }, { "epoch": 0.9809616005162956, "grad_norm": 1.4178507942340346, "learning_rate": 9.9066016733651e-10, "loss": 0.3746, "step": 6080 }, { "epoch": 0.9811229428848015, "grad_norm": 1.5609883434732856, "learning_rate": 9.739458948759826e-10, "loss": 0.4774, "step": 6081 }, { "epoch": 0.9812842852533075, "grad_norm": 1.672309319955159, "learning_rate": 9.573736874996519e-10, "loss": 0.542, "step": 6082 }, { "epoch": 0.9814456276218135, "grad_norm": 1.8445445143058319, "learning_rate": 9.409435499254104e-10, "loss": 0.4562, "step": 6083 }, { "epoch": 0.9816069699903195, "grad_norm": 1.7582978659332607, "learning_rate": 9.246554868306278e-10, "loss": 0.4249, "step": 6084 }, { "epoch": 0.9817683123588254, "grad_norm": 2.0621776906956706, "learning_rate": 9.085095028523171e-10, "loss": 0.4559, "step": 6085 }, { "epoch": 0.9819296547273314, "grad_norm": 1.5057308077816995, "learning_rate": 8.925056025869126e-10, "loss": 0.3962, "step": 6086 }, { "epoch": 0.9820909970958374, "grad_norm": 1.3332178194950197, "learning_rate": 8.766437905905477e-10, "loss": 0.4283, "step": 6087 }, { "epoch": 0.9822523394643433, "grad_norm": 1.87467470526204, "learning_rate": 8.609240713788324e-10, "loss": 0.5683, "step": 6088 }, { "epoch": 0.9824136818328493, "grad_norm": 1.8075624495780929, "learning_rate": 8.453464494269091e-10, "loss": 0.572, "step": 6089 }, { "epoch": 0.9825750242013552, "grad_norm": 2.0298075322580402, "learning_rate": 8.299109291695084e-10, "loss": 0.6233, "step": 6090 }, { "epoch": 0.9827363665698613, "grad_norm": 1.123611175737247, "learning_rate": 8.146175150008928e-10, "loss": 0.4868, "step": 6091 }, { "epoch": 0.9828977089383673, "grad_norm": 1.3464862997220024, "learning_rate": 7.994662112748019e-10, "loss": 0.5135, "step": 6092 }, { "epoch": 0.9830590513068732, "grad_norm": 1.6690923761044782, "learning_rate": 7.844570223046187e-10, "loss": 0.5114, "step": 6093 }, { "epoch": 0.9832203936753792, "grad_norm": 1.6350863015732624, "learning_rate": 7.695899523633143e-10, "loss": 0.4943, "step": 6094 }, { "epoch": 0.9833817360438851, "grad_norm": 1.6203392387104123, "learning_rate": 7.548650056831696e-10, "loss": 0.5136, "step": 6095 }, { "epoch": 0.9835430784123911, "grad_norm": 1.4849504660362989, "learning_rate": 7.402821864562203e-10, "loss": 0.4941, "step": 6096 }, { "epoch": 0.983704420780897, "grad_norm": 1.5573081926810688, "learning_rate": 7.258414988339789e-10, "loss": 0.4421, "step": 6097 }, { "epoch": 0.983865763149403, "grad_norm": 1.3674052037886453, "learning_rate": 7.115429469274903e-10, "loss": 0.4905, "step": 6098 }, { "epoch": 0.984027105517909, "grad_norm": 1.9969904428940088, "learning_rate": 6.973865348073315e-10, "loss": 0.6005, "step": 6099 }, { "epoch": 0.984188447886415, "grad_norm": 1.4874256307780018, "learning_rate": 6.833722665036124e-10, "loss": 0.4271, "step": 6100 }, { "epoch": 0.984349790254921, "grad_norm": 1.8626944139639818, "learning_rate": 6.695001460059191e-10, "loss": 0.4646, "step": 6101 }, { "epoch": 0.9845111326234269, "grad_norm": 1.2640386458929378, "learning_rate": 6.557701772635371e-10, "loss": 0.4077, "step": 6102 }, { "epoch": 0.9846724749919329, "grad_norm": 0.9124615334541886, "learning_rate": 6.421823641851176e-10, "loss": 0.2513, "step": 6103 }, { "epoch": 0.9848338173604388, "grad_norm": 1.7007488508735082, "learning_rate": 6.287367106389552e-10, "loss": 0.3001, "step": 6104 }, { "epoch": 0.9849951597289448, "grad_norm": 1.8139347448714171, "learning_rate": 6.154332204527101e-10, "loss": 0.3639, "step": 6105 }, { "epoch": 0.9851565020974508, "grad_norm": 1.7709270302910847, "learning_rate": 6.022718974137975e-10, "loss": 0.4082, "step": 6106 }, { "epoch": 0.9853178444659567, "grad_norm": 1.5856661259028313, "learning_rate": 5.892527452689977e-10, "loss": 0.5167, "step": 6107 }, { "epoch": 0.9854791868344628, "grad_norm": 1.359171139569215, "learning_rate": 5.763757677246795e-10, "loss": 0.367, "step": 6108 }, { "epoch": 0.9856405292029687, "grad_norm": 2.0467507128073015, "learning_rate": 5.636409684466881e-10, "loss": 0.5201, "step": 6109 }, { "epoch": 0.9858018715714747, "grad_norm": 1.5796802524358886, "learning_rate": 5.510483510605124e-10, "loss": 0.4245, "step": 6110 }, { "epoch": 0.9859632139399807, "grad_norm": 1.3620386007359389, "learning_rate": 5.385979191509515e-10, "loss": 0.4095, "step": 6111 }, { "epoch": 0.9861245563084866, "grad_norm": 1.6601886342199905, "learning_rate": 5.262896762625035e-10, "loss": 0.4655, "step": 6112 }, { "epoch": 0.9862858986769926, "grad_norm": 1.458450566983828, "learning_rate": 5.141236258991988e-10, "loss": 0.3215, "step": 6113 }, { "epoch": 0.9864472410454985, "grad_norm": 1.5242991822654284, "learning_rate": 5.020997715244335e-10, "loss": 0.392, "step": 6114 }, { "epoch": 0.9866085834140045, "grad_norm": 1.9288906660721226, "learning_rate": 4.902181165613029e-10, "loss": 0.5416, "step": 6115 }, { "epoch": 0.9867699257825104, "grad_norm": 1.2670970409387359, "learning_rate": 4.784786643922679e-10, "loss": 0.4298, "step": 6116 }, { "epoch": 0.9869312681510165, "grad_norm": 1.7961519859931423, "learning_rate": 4.668814183593772e-10, "loss": 0.5247, "step": 6117 }, { "epoch": 0.9870926105195225, "grad_norm": 1.3573953090785076, "learning_rate": 4.554263817642123e-10, "loss": 0.4434, "step": 6118 }, { "epoch": 0.9872539528880284, "grad_norm": 1.3891491720237965, "learning_rate": 4.441135578678312e-10, "loss": 0.396, "step": 6119 }, { "epoch": 0.9874152952565344, "grad_norm": 1.4887040306187929, "learning_rate": 4.329429498908244e-10, "loss": 0.5073, "step": 6120 }, { "epoch": 0.9875766376250403, "grad_norm": 1.4089015123047146, "learning_rate": 4.219145610133146e-10, "loss": 0.4705, "step": 6121 }, { "epoch": 0.9877379799935463, "grad_norm": 1.8715371261611695, "learning_rate": 4.1102839437490154e-10, "loss": 0.6465, "step": 6122 }, { "epoch": 0.9878993223620522, "grad_norm": 1.5645305409606527, "learning_rate": 4.0028445307471736e-10, "loss": 0.2851, "step": 6123 }, { "epoch": 0.9880606647305582, "grad_norm": 1.3209686905892315, "learning_rate": 3.896827401713154e-10, "loss": 0.3918, "step": 6124 }, { "epoch": 0.9882220070990642, "grad_norm": 1.3704914824323042, "learning_rate": 3.792232586830035e-10, "loss": 0.329, "step": 6125 }, { "epoch": 0.9883833494675702, "grad_norm": 1.4655388975574501, "learning_rate": 3.6890601158728885e-10, "loss": 0.4291, "step": 6126 }, { "epoch": 0.9885446918360762, "grad_norm": 1.5104659061517578, "learning_rate": 3.587310018213774e-10, "loss": 0.4814, "step": 6127 }, { "epoch": 0.9887060342045821, "grad_norm": 1.3471482798460424, "learning_rate": 3.4869823228200754e-10, "loss": 0.5363, "step": 6128 }, { "epoch": 0.9888673765730881, "grad_norm": 1.3848220533917857, "learning_rate": 3.388077058252281e-10, "loss": 0.4872, "step": 6129 }, { "epoch": 0.989028718941594, "grad_norm": 1.4735511064472, "learning_rate": 3.290594252668422e-10, "loss": 0.4342, "step": 6130 }, { "epoch": 0.9891900613101, "grad_norm": 2.0091212419427156, "learning_rate": 3.194533933820187e-10, "loss": 0.4016, "step": 6131 }, { "epoch": 0.989351403678606, "grad_norm": 1.5806214632633533, "learning_rate": 3.099896129053481e-10, "loss": 0.3832, "step": 6132 }, { "epoch": 0.9895127460471119, "grad_norm": 1.5461023886556367, "learning_rate": 3.0066808653111954e-10, "loss": 0.3941, "step": 6133 }, { "epoch": 0.989674088415618, "grad_norm": 1.9711201040446753, "learning_rate": 2.914888169129881e-10, "loss": 0.4867, "step": 6134 }, { "epoch": 0.989835430784124, "grad_norm": 2.0269579805178886, "learning_rate": 2.824518066641968e-10, "loss": 0.5169, "step": 6135 }, { "epoch": 0.9899967731526299, "grad_norm": 1.2935343078823385, "learning_rate": 2.735570583573543e-10, "loss": 0.3427, "step": 6136 }, { "epoch": 0.9901581155211359, "grad_norm": 1.0887830487916867, "learning_rate": 2.6480457452476847e-10, "loss": 0.3123, "step": 6137 }, { "epoch": 0.9903194578896418, "grad_norm": 1.349646207580111, "learning_rate": 2.561943576580572e-10, "loss": 0.3665, "step": 6138 }, { "epoch": 0.9904808002581478, "grad_norm": 1.6137218843429386, "learning_rate": 2.477264102084264e-10, "loss": 0.4808, "step": 6139 }, { "epoch": 0.9906421426266537, "grad_norm": 1.2289486169029489, "learning_rate": 2.3940073458661446e-10, "loss": 0.437, "step": 6140 }, { "epoch": 0.9908034849951597, "grad_norm": 0.9801607598095033, "learning_rate": 2.3121733316283643e-10, "loss": 0.2867, "step": 6141 }, { "epoch": 0.9909648273636656, "grad_norm": 1.498654795495007, "learning_rate": 2.2317620826667327e-10, "loss": 0.6777, "step": 6142 }, { "epoch": 0.9911261697321717, "grad_norm": 1.2007814842923792, "learning_rate": 2.152773621874049e-10, "loss": 0.4825, "step": 6143 }, { "epoch": 0.9912875121006777, "grad_norm": 1.4570551402406362, "learning_rate": 2.0752079717362147e-10, "loss": 0.4894, "step": 6144 }, { "epoch": 0.9914488544691836, "grad_norm": 2.0460507429510915, "learning_rate": 1.9990651543361213e-10, "loss": 0.415, "step": 6145 }, { "epoch": 0.9916101968376896, "grad_norm": 1.478944018446434, "learning_rate": 1.9243451913497631e-10, "loss": 0.4537, "step": 6146 }, { "epoch": 0.9917715392061955, "grad_norm": 1.3962918235504986, "learning_rate": 1.8510481040490133e-10, "loss": 0.4615, "step": 6147 }, { "epoch": 0.9919328815747015, "grad_norm": 2.1304919016034423, "learning_rate": 1.7791739132999584e-10, "loss": 0.5998, "step": 6148 }, { "epoch": 0.9920942239432075, "grad_norm": 2.0065059938243825, "learning_rate": 1.708722639565119e-10, "loss": 0.4488, "step": 6149 }, { "epoch": 0.9922555663117134, "grad_norm": 1.9229157893825306, "learning_rate": 1.6396943028995635e-10, "loss": 0.4693, "step": 6150 }, { "epoch": 0.9924169086802195, "grad_norm": 1.6103109212342395, "learning_rate": 1.57208892295535e-10, "loss": 0.4235, "step": 6151 }, { "epoch": 0.9925782510487254, "grad_norm": 1.6285879432467103, "learning_rate": 1.50590651897875e-10, "loss": 0.4753, "step": 6152 }, { "epoch": 0.9927395934172314, "grad_norm": 1.8008779636320806, "learning_rate": 1.441147109810803e-10, "loss": 0.4622, "step": 6153 }, { "epoch": 0.9929009357857373, "grad_norm": 1.5403445563112146, "learning_rate": 1.3778107138873174e-10, "loss": 0.4753, "step": 6154 }, { "epoch": 0.9930622781542433, "grad_norm": 2.004659351320729, "learning_rate": 1.3158973492394254e-10, "loss": 0.4414, "step": 6155 }, { "epoch": 0.9932236205227493, "grad_norm": 1.9137052864417505, "learning_rate": 1.2554070334930277e-10, "loss": 0.5654, "step": 6156 }, { "epoch": 0.9933849628912552, "grad_norm": 2.725594247540852, "learning_rate": 1.1963397838682387e-10, "loss": 0.5909, "step": 6157 }, { "epoch": 0.9935463052597612, "grad_norm": 1.5467641000778602, "learning_rate": 1.1386956171816064e-10, "loss": 0.53, "step": 6158 }, { "epoch": 0.9937076476282671, "grad_norm": 1.4003490404581278, "learning_rate": 1.0824745498422272e-10, "loss": 0.415, "step": 6159 }, { "epoch": 0.9938689899967732, "grad_norm": 1.2919543110088352, "learning_rate": 1.0276765978561864e-10, "loss": 0.3952, "step": 6160 }, { "epoch": 0.9940303323652792, "grad_norm": 1.1557127761308907, "learning_rate": 9.743017768237826e-11, "loss": 0.4, "step": 6161 }, { "epoch": 0.9941916747337851, "grad_norm": 1.2737614506290103, "learning_rate": 9.223501019389735e-11, "loss": 0.4059, "step": 6162 }, { "epoch": 0.9943530171022911, "grad_norm": 1.7402207430766936, "learning_rate": 8.7182158799326e-11, "loss": 0.4664, "step": 6163 }, { "epoch": 0.994514359470797, "grad_norm": 1.9137464612732504, "learning_rate": 8.227162493695817e-11, "loss": 0.5377, "step": 6164 }, { "epoch": 0.994675701839303, "grad_norm": 2.104580552089165, "learning_rate": 7.750341000489768e-11, "loss": 0.4742, "step": 6165 }, { "epoch": 0.9948370442078089, "grad_norm": 1.436856498980789, "learning_rate": 7.287751536050324e-11, "loss": 0.4976, "step": 6166 }, { "epoch": 0.9949983865763149, "grad_norm": 1.7662949767868852, "learning_rate": 6.839394232066587e-11, "loss": 0.4059, "step": 6167 }, { "epoch": 0.995159728944821, "grad_norm": 1.8402916106018066, "learning_rate": 6.405269216186449e-11, "loss": 0.5471, "step": 6168 }, { "epoch": 0.9953210713133269, "grad_norm": 1.7525290878864006, "learning_rate": 5.985376611999937e-11, "loss": 0.5437, "step": 6169 }, { "epoch": 0.9954824136818329, "grad_norm": 1.2843928519166183, "learning_rate": 5.579716539033663e-11, "loss": 0.395, "step": 6170 }, { "epoch": 0.9956437560503388, "grad_norm": 1.4897452135311642, "learning_rate": 5.1882891127785764e-11, "loss": 0.3234, "step": 6171 }, { "epoch": 0.9958050984188448, "grad_norm": 1.4227334250351182, "learning_rate": 4.811094444673314e-11, "loss": 0.4247, "step": 6172 }, { "epoch": 0.9959664407873507, "grad_norm": 1.6475094175865512, "learning_rate": 4.448132642087543e-11, "loss": 0.5135, "step": 6173 }, { "epoch": 0.9961277831558567, "grad_norm": 1.7014892141990694, "learning_rate": 4.099403808366375e-11, "loss": 0.5059, "step": 6174 }, { "epoch": 0.9962891255243627, "grad_norm": 1.0628072630953174, "learning_rate": 3.7649080427748503e-11, "loss": 0.3794, "step": 6175 }, { "epoch": 0.9964504678928686, "grad_norm": 1.6611608719292026, "learning_rate": 3.4446454405423484e-11, "loss": 0.5755, "step": 6176 }, { "epoch": 0.9966118102613747, "grad_norm": 1.6357313286719795, "learning_rate": 3.138616092840385e-11, "loss": 0.5028, "step": 6177 }, { "epoch": 0.9967731526298806, "grad_norm": 1.8284093521551628, "learning_rate": 2.846820086799262e-11, "loss": 0.5197, "step": 6178 }, { "epoch": 0.9969344949983866, "grad_norm": 1.8168294434139074, "learning_rate": 2.5692575054803156e-11, "loss": 0.4941, "step": 6179 }, { "epoch": 0.9970958373668926, "grad_norm": 1.1845061055996633, "learning_rate": 2.3059284279092205e-11, "loss": 0.3662, "step": 6180 }, { "epoch": 0.9972571797353985, "grad_norm": 1.4544151075707392, "learning_rate": 2.0568329290426843e-11, "loss": 0.3671, "step": 6181 }, { "epoch": 0.9974185221039045, "grad_norm": 2.1455384826551582, "learning_rate": 1.8219710797962028e-11, "loss": 0.6516, "step": 6182 }, { "epoch": 0.9975798644724104, "grad_norm": 1.1831925722585444, "learning_rate": 1.6013429470385086e-11, "loss": 0.3819, "step": 6183 }, { "epoch": 0.9977412068409164, "grad_norm": 2.1046202016604574, "learning_rate": 1.3949485935749184e-11, "loss": 0.7482, "step": 6184 }, { "epoch": 0.9979025492094223, "grad_norm": 1.4917635490169134, "learning_rate": 1.2027880781584343e-11, "loss": 0.462, "step": 6185 }, { "epoch": 0.9980638915779284, "grad_norm": 1.4486786280537727, "learning_rate": 1.0248614555008472e-11, "loss": 0.3282, "step": 6186 }, { "epoch": 0.9982252339464344, "grad_norm": 1.279405289344057, "learning_rate": 8.611687762505315e-12, "loss": 0.4027, "step": 6187 }, { "epoch": 0.9983865763149403, "grad_norm": 2.4943902018520396, "learning_rate": 7.117100870146497e-12, "loss": 0.6318, "step": 6188 }, { "epoch": 0.9985479186834463, "grad_norm": 1.8105192213199623, "learning_rate": 5.76485430331397e-12, "loss": 0.4556, "step": 6189 }, { "epoch": 0.9987092610519522, "grad_norm": 1.1932287066605989, "learning_rate": 4.55494844703308e-12, "loss": 0.3025, "step": 6190 }, { "epoch": 0.9988706034204582, "grad_norm": 2.145417021825893, "learning_rate": 3.48738364580603e-12, "loss": 0.4771, "step": 6191 }, { "epoch": 0.9990319457889641, "grad_norm": 1.6118942098667268, "learning_rate": 2.5621602034453516e-12, "loss": 0.4707, "step": 6192 }, { "epoch": 0.9991932881574701, "grad_norm": 1.3165962693351774, "learning_rate": 1.779278383351457e-12, "loss": 0.3857, "step": 6193 }, { "epoch": 0.9993546305259762, "grad_norm": 1.1710729779112021, "learning_rate": 1.1387384085126406e-12, "loss": 0.4122, "step": 6194 }, { "epoch": 0.9995159728944821, "grad_norm": 1.8092563807686097, "learning_rate": 6.405404611720122e-13, "loss": 0.5576, "step": 6195 }, { "epoch": 0.9996773152629881, "grad_norm": 1.5469239352413968, "learning_rate": 2.8468468316056313e-13, "loss": 0.5307, "step": 6196 }, { "epoch": 0.999838657631494, "grad_norm": 1.2124123236821176, "learning_rate": 7.117117584165555e-14, "loss": 0.2307, "step": 6197 }, { "epoch": 1.0, "grad_norm": 1.5624449910971163, "learning_rate": 0.0, "loss": 0.5041, "step": 6198 }, { "epoch": 1.0, "step": 6198, "total_flos": 3248542671962112.0, "train_loss": 0.47701127381671354, "train_runtime": 533164.508, "train_samples_per_second": 0.186, "train_steps_per_second": 0.012 } ], "logging_steps": 1.0, "max_steps": 6198, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3248542671962112.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }